Skip to content

Commit ca2a87f

Browse files
jgstern-agentjgstern
authored andcommitted
fix(linkers,supply-chain): cobra package-level vars and Gradle workspace detection
Two fixes: 1. go_cobra_linker (WI-lihih): When a cobra.Command literal is inside a package-level var declaration (the most common Go cobra pattern), fall back to the enclosing variable symbol as the edge source. Previously these were silently skipped because find_enclosing_symbol only looked for function/method/class/module kinds. 2. supply_chain (WI-zizuf): detect_package_roots() now parses settings.gradle / settings.gradle.kts include directives to detect Gradle multi-project subprojects. Fixes degenerate tier distribution on Gradle monorepos (e.g. Kafka) where all files were classified as first_party because no workspace roots were detected. Signed-off-by: jgstern-agent <josh-agent@iterabloom.com>
1 parent bf84616 commit ca2a87f

6 files changed

Lines changed: 294 additions & 15 deletions

File tree

.ci/affected-tests.txt

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,69 @@
11
# Test selection manifest
2-
# Generated by smart-test at 2026-04-12T03:41:33-04:00
2+
# Generated by smart-test at 2026-04-12T04:44:45-04:00
33
# Mode: targeted
4-
# Baseline: 2cafd5680732b1462de6628e5547e403f2f9e00e
5-
# Reason: no Python source files changed
6-
# Changed files: 7
7-
# Changed source files: 0
8-
# Selected tests: 0
4+
# Baseline: 4a043784bfa0467b7ffc73af361527d47e25da31
5+
# Changed files: 10
6+
# Changed source files: 2
7+
# Selected tests: 57
98
#
109
# === CHANGED_SOURCE_FILES ===
10+
packages/hypergumbo-core/src/hypergumbo_core/linkers/go_cobra.py
11+
packages/hypergumbo-core/src/hypergumbo_core/supply_chain.py
1112
# === SELECTED_TESTS ===
13+
packages/hypergumbo-core/tests/test_analyzer_registry.py
14+
packages/hypergumbo-core/tests/test_annotation_convention_linker.py
15+
packages/hypergumbo-core/tests/test_build_grammars.py
16+
packages/hypergumbo-core/tests/test_cli_basic.py
17+
packages/hypergumbo-core/tests/test_cli_cache.py
18+
packages/hypergumbo-core/tests/test_cli_commands.py
19+
packages/hypergumbo-core/tests/test_cli_config.py
20+
packages/hypergumbo-core/tests/test_cli_dead_code.py
21+
packages/hypergumbo-core/tests/test_cli_explain.py
22+
packages/hypergumbo-core/tests/test_cli_io_boundaries.py
23+
packages/hypergumbo-core/tests/test_cli_routes.py
24+
packages/hypergumbo-core/tests/test_cli_run_behavior_map.py
25+
packages/hypergumbo-core/tests/test_cli_search.py
26+
packages/hypergumbo-core/tests/test_cli_symbols.py
27+
packages/hypergumbo-core/tests/test_cli_test_coverage.py
28+
packages/hypergumbo-core/tests/test_cli_verify_claims.py
29+
packages/hypergumbo-core/tests/test_crypto_flow_linker.py
30+
packages/hypergumbo-core/tests/test_file_excludes.py
31+
packages/hypergumbo-core/tests/test_frameworks_flag.py
32+
packages/hypergumbo-core/tests/test_gitleaks.py
33+
packages/hypergumbo-core/tests/test_go_cobra_linker.py
34+
packages/hypergumbo-core/tests/test_ir.py
35+
packages/hypergumbo-core/tests/test_linker_filtering.py
36+
packages/hypergumbo-core/tests/test_linker_registry.py
37+
packages/hypergumbo-core/tests/test_locale.py
38+
packages/hypergumbo-core/tests/test_max_tier.py
39+
packages/hypergumbo-core/tests/test_message_dispatch_linker.py
40+
packages/hypergumbo-core/tests/test_no_first_party_priority.py
41+
packages/hypergumbo-core/tests/test_profile.py
42+
packages/hypergumbo-core/tests/test_run_behavior_map.py
43+
packages/hypergumbo-core/tests/test_schema_compliance.py
44+
packages/hypergumbo-core/tests/test_sketch.py
45+
packages/hypergumbo-core/tests/test_sketch_sanity.py
46+
packages/hypergumbo-core/tests/test_slice_tier_filter.py
47+
packages/hypergumbo-core/tests/test_stable_shape_ids.py
48+
packages/hypergumbo-core/tests/test_supply_chain.py
49+
packages/hypergumbo-core/tests/test_tree_sitter_analyzer.py
50+
packages/hypergumbo-core/tests/test_yjs_crdt_linker.py
51+
packages/hypergumbo-lang-common/tests/BRANCHES_test_dart.py
52+
packages/hypergumbo-lang-common/tests/BRANCHES_test_elixir.py
53+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_cpp.py
54+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_c.py
55+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_csharp.py
56+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_go.py
57+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_java.py
58+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_js_ts.py
59+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_kotlin.py
60+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_php.py
61+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_python_ast_analysis.py
62+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_ruby.py
63+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_rust.py
64+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_scala.py
65+
packages/hypergumbo-lang-mainstream/tests/BRANCHES_test_swift.py
66+
packages/hypergumbo-lang-mainstream/tests/test_go.py
67+
packages/hypergumbo-lang-mainstream/tests/test_html_analysis.py
68+
packages/hypergumbo-lang-mainstream/tests/test_python_ast_analysis.py
69+
packages/hypergumbo-lang-mainstream/tests/test_rust.py

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ This changelog tracks the **tool version** (package releases). The **schema vers
3434
#### Cross-language linkers
3535

3636
- **`go_memberlist` linker** (WI-lojuf): `dispatches_to` edges from `memberlist.Create` to the 12 canonical delegate methods (`NotifyMsg`, `GetBroadcasts`, `LocalState`, etc.). Used by alertmanager, consul, nomad, serf, vault.
37-
- **`go_cobra` linker** (WI-gohad): `dispatches_to` edges from `cobra.Command{…}` struct literals to handler functions in `Run`/`RunE`/`PreRun`/`PostRun` and `Persistent*` variants. Used by kubectl, helm, hugo, prometheus, terraform, docker.
37+
- **`go_cobra` linker** (WI-gohad): `dispatches_to` edges from `cobra.Command{…}` struct literals to handler functions in `Run`/`RunE`/`PreRun`/`PostRun` and `Persistent*` variants. Used by kubectl, helm, hugo, prometheus, terraform, docker. Package-level `var cmd = &cobra.Command{…}` declarations (WI-lihih) now emit edges from the var symbol when no enclosing function exists.
3838

3939
#### Behavior map
4040

@@ -48,6 +48,7 @@ This changelog tracks the **tool version** (package releases). The **schema vers
4848
- **Go closure wrapper edges** (WI-nikul): route registrations through closure wrappers (e.g. `wrapAgent(api.query)`) emit `wraps` edges. Covers Gin/Echo/Fiber and Gorilla mux/stdlib.
4949
- **Import-based framework validation**: manifest-detected frameworks cross-referenced against import edges. Test-only or unimported frameworks reclassified as `dev_frameworks`.
5050
- **Go tier 2/3 classification via go.mod** (WI-vovuk): unresolved Go external references classified using `go.mod` — direct deps tier 2, indirect/stdlib tier 3. Language-agnostic `DependencyManifest` enables future extension.
51+
- **Gradle multi-project workspace detection** (WI-zizuf): `detect_package_roots()` now parses `settings.gradle` / `settings.gradle.kts` `include` directives. Gradle subprojects are classified as workspace members, fixing degenerate tier distribution on Gradle monorepos like Kafka.
5152
- **Orchestration hub floor for symbol ranking**: functions with out-degree ≥ 20 get a minimum effective in-degree of `sqrt(out_degree) * 0.8`, preventing orchestration hubs (main, run, app) from being buried by within-file dampening.
5253
- **Event edge type weights**: `event_subscribes`/`event_publishes` raised to 0.8 (was 0.5). `dispatches_to` added at 0.6.
5354

packages/hypergumbo-core/src/hypergumbo_core/linkers/go_cobra.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030
already reachable via containment.
3131
3. For each captured ``(field, identifier)`` pair, resolve
3232
``identifier`` to a Symbol via ``ctx.find_symbols_by_name`` and emit
33-
a ``dispatches_to`` edge from the enclosing function (or the
34-
cobra.Command site if no enclosing function can be found) to the
35-
handler.
33+
a ``dispatches_to`` edge from the enclosing function to the handler.
34+
When the cobra.Command literal is at package level (inside a
35+
``var … = &cobra.Command{…}`` declaration), the linker falls back
36+
to the enclosing variable symbol as the edge source.
3637
3738
Why regex and not tree-sitter
3839
-----------------------------
@@ -212,8 +213,13 @@ def go_cobra_linker(ctx: LinkerContext) -> LinkerResult:
212213

213214
# Find the enclosing function. If the cobra.Command literal
214215
# is outside any function (e.g., a package-level var init),
215-
# fall back to the first file-level symbol.
216+
# fall back to the enclosing variable symbol — the most
217+
# common Go cobra pattern is ``var rootCmd = &cobra.Command{…}``.
216218
enclosing = ctx.find_enclosing_symbol(str(file_path), line)
219+
if enclosing is None:
220+
enclosing = ctx.find_enclosing_symbol(
221+
str(file_path), line, kinds=("variable",),
222+
)
217223
if enclosing is None:
218224
continue
219225

packages/hypergumbo-core/src/hypergumbo_core/supply_chain.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def detect_package_roots(repo_root: Path) -> set[Path]:
642642
- npm/yarn/pnpm workspaces in package.json
643643
- Cargo workspace members in Cargo.toml
644644
- Maven modules in pom.xml
645+
- Gradle subprojects in settings.gradle / settings.gradle.kts
645646
646647
Args:
647648
repo_root: Root directory of the repository
@@ -717,6 +718,33 @@ def detect_package_roots(repo_root: Path) -> set[Path]:
717718
except (OSError, ET.ParseError):
718719
pass
719720

721+
# Gradle multi-project builds (WI-zizuf)
722+
# settings.gradle or settings.gradle.kts declares subprojects via
723+
# include('mod') or include 'mod'. Colon separators (e.g., 'connect:api')
724+
# map to directory nesting (connect/api/).
725+
for settings_name in ("settings.gradle", "settings.gradle.kts"):
726+
settings_file = repo_root / settings_name
727+
if settings_file.exists():
728+
try:
729+
content = settings_file.read_text()
730+
# Match both Groovy (include 'a', 'b') and Kotlin DSL
731+
# (include("a", "b")) styles. Each include() call may
732+
# list multiple projects separated by commas.
733+
for m in re.finditer(
734+
r"""include\s*\(?\s*((?:['"]:?[^'"]*['"],?\s*)+)\)?""",
735+
content,
736+
):
737+
for name in re.findall(r"""['"][:.]?([^'"]+)['"]""", m.group(1)):
738+
if not name: # pragma: no cover - regex guarantees non-empty
739+
continue
740+
# Gradle ':' separator maps to directory '/'
741+
dir_name = name.replace(":", "/")
742+
mod_path = repo_root / dir_name
743+
if mod_path.is_dir():
744+
roots.add(mod_path)
745+
except OSError:
746+
pass
747+
720748
return roots
721749

722750

packages/hypergumbo-core/tests/test_go_cobra_linker.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,11 +417,58 @@ def test_package_qualified_handler_resolves_via_short_name(
417417
assert matching[0].meta is not None
418418
assert matching[0].meta.get("handler_name") == "runner.runMyCmd"
419419

420-
def test_handler_outside_any_function_yields_no_edge(
420+
def test_package_level_var_emits_edge_from_var_symbol(
421421
self, tmp_path: Path,
422422
) -> None:
423-
"""When the cobra.Command literal is at package level with no
424-
enclosing function symbol, the linker skips rather than emitting a
423+
"""When the cobra.Command literal is a package-level var declaration,
424+
the linker should emit an edge from the var symbol to the handler."""
425+
p = tmp_path / "cmd.go"
426+
p.write_text(
427+
'package main\n\n'
428+
'import "github.com/spf13/cobra"\n\n'
429+
'var rootCmd = &cobra.Command{\n'
430+
' Use: "root",\n'
431+
' RunE: rootRun,\n'
432+
'}\n\n'
433+
'func rootRun(cmd *cobra.Command, args []string) error {\n'
434+
' return nil\n'
435+
'}\n',
436+
)
437+
438+
var_sym = Symbol(
439+
id=f"go:{p}:5-8:rootCmd:variable",
440+
name="rootCmd",
441+
kind="variable",
442+
language="go",
443+
path=str(p),
444+
span=Span(start_line=5, end_line=8, start_col=0, end_col=0),
445+
)
446+
handler_sym = Symbol(
447+
id=f"go:{p}:10-12:rootRun:function",
448+
name="rootRun",
449+
kind="function",
450+
language="go",
451+
path=str(p),
452+
span=Span(start_line=10, end_line=12, start_col=0, end_col=0),
453+
)
454+
ctx = LinkerContext(
455+
repo_root=tmp_path,
456+
symbols=[var_sym, handler_sym],
457+
detected_languages={"go"},
458+
)
459+
result = go_cobra_linker(ctx)
460+
# Package-level var → edge from var symbol to handler.
461+
assert len(result.edges) == 1
462+
edge = result.edges[0]
463+
assert edge.src == var_sym.id
464+
assert edge.dst == handler_sym.id
465+
assert edge.edge_type == "dispatches_to"
466+
467+
def test_package_level_var_no_var_symbol_yields_no_edge(
468+
self, tmp_path: Path,
469+
) -> None:
470+
"""When the cobra.Command literal is at package level and no
471+
var symbol is provided, the linker skips rather than emitting a
425472
misattributed edge."""
426473
p = tmp_path / "cmd.go"
427474
p.write_text(
@@ -450,7 +497,7 @@ def test_handler_outside_any_function_yields_no_edge(
450497
detected_languages={"go"},
451498
)
452499
result = go_cobra_linker(ctx)
453-
# No enclosing function at package level → no edges.
500+
# No var symbol and no enclosing function → no edges.
454501
assert result.edges == []
455502

456503
def test_pattern_compiles(self) -> None:

packages/hypergumbo-core/tests/test_supply_chain.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,145 @@ def test_examples_lower_priority_than_workspace_source(self, tmp_path):
536536
assert lib_result.tier < example_result.tier
537537

538538

539+
class TestGradleWorkspaces:
540+
"""Gradle multi-project workspace detection (WI-zizuf)."""
541+
542+
def test_settings_gradle_includes(self, tmp_path):
543+
"""Detect Gradle subprojects from settings.gradle include directives."""
544+
settings = tmp_path / "settings.gradle"
545+
settings.write_text("include 'clients', 'core', 'streams'\n")
546+
547+
for mod in ["clients", "core", "streams"]:
548+
(tmp_path / mod).mkdir()
549+
550+
roots = detect_package_roots(tmp_path)
551+
assert tmp_path / "clients" in roots
552+
assert tmp_path / "core" in roots
553+
assert tmp_path / "streams" in roots
554+
555+
def test_settings_gradle_kts_includes(self, tmp_path):
556+
"""Detect Gradle subprojects from settings.gradle.kts."""
557+
settings = tmp_path / "settings.gradle.kts"
558+
settings.write_text('include("clients", "core")\n')
559+
560+
for mod in ["clients", "core"]:
561+
(tmp_path / mod).mkdir()
562+
563+
roots = detect_package_roots(tmp_path)
564+
assert tmp_path / "clients" in roots
565+
assert tmp_path / "core" in roots
566+
567+
def test_settings_gradle_colon_prefix(self, tmp_path):
568+
"""Handle Gradle include(':module') syntax with colon prefix."""
569+
settings = tmp_path / "settings.gradle"
570+
settings.write_text("include ':clients'\ninclude ':core'\n")
571+
572+
for mod in ["clients", "core"]:
573+
(tmp_path / mod).mkdir()
574+
575+
roots = detect_package_roots(tmp_path)
576+
assert tmp_path / "clients" in roots
577+
assert tmp_path / "core" in roots
578+
579+
def test_settings_gradle_nested_subproject(self, tmp_path):
580+
"""Handle Gradle nested subprojects like 'connect:api' → connect/api/."""
581+
settings = tmp_path / "settings.gradle"
582+
settings.write_text("include 'connect:api', 'connect:runtime'\n")
583+
584+
for mod in ["connect/api", "connect/runtime"]:
585+
(tmp_path / mod).mkdir(parents=True)
586+
587+
roots = detect_package_roots(tmp_path)
588+
assert tmp_path / "connect" / "api" in roots
589+
assert tmp_path / "connect" / "runtime" in roots
590+
591+
def test_settings_gradle_nonexistent_module(self, tmp_path):
592+
"""Gradle modules that don't exist on disk are ignored."""
593+
settings = tmp_path / "settings.gradle"
594+
settings.write_text("include 'exists', 'does-not-exist'\n")
595+
596+
(tmp_path / "exists").mkdir()
597+
roots = detect_package_roots(tmp_path)
598+
assert tmp_path / "exists" in roots
599+
assert tmp_path / "does-not-exist" not in roots
600+
601+
def test_settings_gradle_empty_or_no_includes(self, tmp_path):
602+
"""settings.gradle without include directives produces no roots."""
603+
settings = tmp_path / "settings.gradle"
604+
settings.write_text("rootProject.name = 'myapp'\n")
605+
606+
roots = detect_package_roots(tmp_path)
607+
# No Gradle includes, and no other workspace configs → empty
608+
assert roots == set()
609+
610+
def test_gradle_subproject_files_are_first_party(self, tmp_path):
611+
"""Files in Gradle subprojects are classified as first-party via workspace."""
612+
settings = tmp_path / "settings.gradle"
613+
settings.write_text("include 'clients'\n")
614+
615+
src = tmp_path / "clients" / "src" / "main" / "java" / "org"
616+
src.mkdir(parents=True)
617+
(src / "App.java").write_text("class App {}")
618+
619+
roots = detect_package_roots(tmp_path)
620+
result = classify_file(src / "App.java", tmp_path, roots)
621+
assert result.tier == Tier.FIRST_PARTY
622+
assert "clients" in result.reason
623+
624+
def test_gradle_subproject_test_dir_is_internal_dep(self, tmp_path):
625+
"""Test directories within Gradle subprojects are tier 2."""
626+
settings = tmp_path / "settings.gradle"
627+
settings.write_text("include 'clients'\n")
628+
629+
test_dir = tmp_path / "clients" / "src" / "test" / "java"
630+
test_dir.mkdir(parents=True)
631+
(test_dir / "AppTest.java").write_text("class AppTest {}")
632+
633+
roots = detect_package_roots(tmp_path)
634+
result = classify_file(test_dir / "AppTest.java", tmp_path, roots)
635+
assert result.tier == Tier.INTERNAL_DEP
636+
assert result.is_test is True
637+
638+
def test_malformed_settings_gradle(self, tmp_path):
639+
"""Malformed settings.gradle doesn't crash."""
640+
settings = tmp_path / "settings.gradle"
641+
settings.write_text("this is not valid groovy\n{{{broken")
642+
643+
roots = detect_package_roots(tmp_path)
644+
# Should not crash; may or may not find roots depending on parsing
645+
assert isinstance(roots, set)
646+
647+
def test_unreadable_settings_gradle(self, tmp_path):
648+
"""Unreadable settings.gradle doesn't crash (OSError path)."""
649+
settings = tmp_path / "settings.gradle"
650+
settings.write_text("include 'core'\n")
651+
(tmp_path / "core").mkdir()
652+
settings.chmod(0o000)
653+
654+
roots = detect_package_roots(tmp_path)
655+
# OSError on read → silently skipped, no Gradle roots detected
656+
assert tmp_path / "core" not in roots
657+
# Restore permissions so tmp_path cleanup succeeds
658+
settings.chmod(0o644)
659+
660+
def test_settings_gradle_multiple_include_lines(self, tmp_path):
661+
"""Multiple include lines are all parsed."""
662+
settings = tmp_path / "settings.gradle"
663+
settings.write_text(
664+
"include 'clients'\n"
665+
"include 'core'\n"
666+
"include 'streams'\n"
667+
)
668+
669+
for mod in ["clients", "core", "streams"]:
670+
(tmp_path / mod).mkdir()
671+
672+
roots = detect_package_roots(tmp_path)
673+
assert tmp_path / "clients" in roots
674+
assert tmp_path / "core" in roots
675+
assert tmp_path / "streams" in roots
676+
677+
539678
class TestDocCClassification:
540679
"""DocC documentation directories should be tier 2 (not first-party)."""
541680

0 commit comments

Comments
 (0)