11package com.bazel_diff.interactor
22
33import com.bazel_diff.bazel.BazelQueryService
4+ import com.bazel_diff.bazel.Module
45import com.bazel_diff.bazel.ModuleGraphParser
56import com.bazel_diff.hash.TargetHash
67import com.bazel_diff.log.Logger
@@ -53,7 +54,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
5354
5455 val impactedTargets = if (changedModules.isNotEmpty()) {
5556 logger.i { " Module changes detected - querying for targets that depend on changed modules" }
56- queryTargetsDependingOnModules(changedModules, to)
57+ queryTargetsDependingOnModules(changedModules, from, to)
5758 } else {
5859 computeSimpleImpactedTargets(from, to)
5960 }
@@ -106,7 +107,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
106107
107108 val impactedTargets = if (changedModules.isNotEmpty()) {
108109 logger.i { " Module changes detected - querying for targets that depend on changed modules" }
109- val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, to)
110+ val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, from, to)
110111 // Mark module-impacted targets with distance 0, then compute distances from there
111112 val moduleImpactedHashes = from.filterKeys { ! moduleImpactedTargets.contains(it) }
112113 computeAllDistances(moduleImpactedHashes, to, depEdges)
@@ -239,56 +240,75 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
239240 }
240241
241242 /* *
242- * Detects module changes by comparing module graphs and returns changed module keys .
243+ * Detects module changes by comparing module graphs and returns the changed Modules .
243244 *
244- * This method:
245- * 1. Parses the from and to module graphs
246- * 2. Identifies which modules changed (added, removed, or version changed)
247- * 3. Logs the changes for visibility
248- * 4. Returns the set of changed module keys
245+ * Resolves each changed key against the "to" graph first (the state we will query
246+ * against), falling back to the "from" graph for modules that were removed.
249247 *
250- * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for starting revision
251- * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for final revision
252- * @return Set of changed module keys , empty if no changes
248+ * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for the starting revision
249+ * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for the final revision
250+ * @return Set of changed Modules , empty if no changes
253251 */
254252 private fun detectChangedModules (
255253 fromModuleGraphJson : String? ,
256254 toModuleGraphJson : String?
257- ): Set <String > {
258- // If either module graph is missing, assume no changes
255+ ): Set <Module > {
259256 if (fromModuleGraphJson == null || toModuleGraphJson == null ) {
260257 return emptySet()
261258 }
262259
263- // Parse module graphs
264260 val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson)
265261 val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson)
262+ val changedKeys = moduleGraphParser.findChangedModules(fromGraph, toGraph)
266263
267- // Find changed modules
268- val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph)
269-
270- if (changedModules.isEmpty()) {
264+ if (changedKeys.isEmpty()) {
271265 logger.i { " No module changes detected" }
272- } else {
273- logger.i { " Detected ${changedModules.size} module changes: ${changedModules.joinToString(" , " )} " }
266+ return emptySet()
274267 }
275268
269+ val changedModules = changedKeys.mapNotNull { key -> toGraph[key] ? : fromGraph[key] }.toSet()
270+ logger.i { " Detected ${changedModules.size} module changes: ${changedModules.joinToString(" , " ) { it.key }} " }
276271 return changedModules
277272 }
278273
279274 /* *
280275 * Queries Bazel to find all workspace targets that depend on any changed module.
281276 *
282- * Maps every changed module to its matching bzlmod canonical repos, then issues a
283- * single `rdeps(//..., @@a//... + @@b//... + ...)` query. Bazel executes the union
284- * in one analysis pass, avoiding per-repo subprocess fan-out.
277+ * For each changed module M we compute the set of canonical repos in `allTargets`
278+ * that belong to M, then issue a single unioned
279+ * `rdeps(//..., @@<repo1>//... + @@<repo2>//... + ...)` query. Bazel executes
280+ * the union in one analysis pass, avoiding per-repo subprocess fan-out.
281+ *
282+ * Repo ownership is decided by two ordered predicates:
283+ *
284+ * 1. Tier A — root repo mapping. If `bazel mod dump_repo_mapping ""` maps
285+ * `M.apparentName` to canonical C, any repo R with R == C or R starts with
286+ * "C+" / "C~" belongs to M. Covers extension-created children (`C++ext+repo`,
287+ * `C~~ext~repo`) whose parent canonical lives in M.
288+ * 2. Tier B — name-prefix fallback. R belongs to M if R starts with
289+ * "{M.name}+" or "{M.name}~". Extension-created forms (`name++ext+repo`,
290+ * `name~~ext~repo`) are already covered by these prefixes. Handles
291+ * transitive modules absent from root's mapping and the case where
292+ * `discoverRepoMapping` failed.
285293 *
286- * @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
287- * @param allTargets Map of all targets from the final revision
294+ * Modules that match nothing (Tier C) are logged and skipped — a module with no
295+ * materialised repos in `allTargets.keys` cannot impact any hashed target, and
296+ * `computeSimpleImpactedTargets` still runs below to catch direct source changes.
297+ *
298+ * The key invariant vs. the previous implementation: we match on the parsed
299+ * canonical repo name (`label.substring(2).substringBefore("//")`), not on a
300+ * `contains` substring of the full label, so a module named "cpp" no longer
301+ * matches canonical `abseil-cpp+`.
302+ *
303+ * @param changedModules Modules identified as changed between the two graphs
304+ * @param from Starting-revision target hashes; used only to pick up labels whose
305+ * content changed independently of any module bump
306+ * @param allTargets Final-revision target hashes (the set we can query against)
288307 * @return Set of target labels that are impacted by module changes
289308 */
290309 private fun queryTargetsDependingOnModules (
291- changedModuleKeys : Set <String >,
310+ changedModules : Set <Module >,
311+ from : Map <String , TargetHash >,
292312 allTargets : Map <String , TargetHash >
293313 ): Set <String > {
294314 val queryService: BazelQueryService ? = try {
@@ -302,48 +322,108 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
302322 return allTargets.keys
303323 }
304324
305- // Map every changed module to its matching bzlmod canonical repos. A single module
306- // name can match multiple canonical repos (e.g. rules_jvm_external matches
307- // rules_jvm_external~~maven~maven, rules_jvm_external~~toolchains~...). Log per
308- // module so an operator can attribute a pathologically large impacted set back to
309- // a specific module bump.
310- val moduleRepos = mutableSetOf<String >()
311- for (moduleKey in changedModuleKeys) {
312- val moduleName = moduleKey.substringBefore(" @" )
313- val matched = allTargets.keys
314- .filter { it.startsWith(" @@" ) && it.contains(moduleName) }
315- .map { it.substring(2 ).substringBefore(" //" ) }
316- if (matched.isEmpty()) {
317- logger.w { " No external repository matched module $moduleKey " }
318- } else {
319- logger.i { " Module $moduleKey matched ${matched.size} repos: ${matched.joinToString(" , " )} " }
320- moduleRepos.addAll(matched)
325+ val repoMapping: Map <String , String > =
326+ try {
327+ runBlocking { queryService.discoverRepoMapping() }
328+ } catch (e: Exception ) {
329+ logger.w { " discoverRepoMapping failed, falling back to module-name matching: ${e.message} " }
330+ emptyMap()
331+ }
332+ // Log size so operators can distinguish "Tier A had nothing to match
333+ // against" from "Tier A matched but the module wasn't in root mapping".
334+ // `discoverRepoMapping` returns an empty map on subprocess exit != 0
335+ // without throwing, so we cannot rely on the catch block above to
336+ // surface that case.
337+ logger.i { " Discovered ${repoMapping.size} root repo mapping entries" }
338+
339+ // Parse `allTargets` into the set of canonical repo names once, instead of
340+ // rescanning every label per changed module.
341+ val canonicalRepos: Set <String > = allTargets.keys.asSequence()
342+ .filter { it.startsWith(" @@" ) }
343+ .map { it.substring(2 ).substringBefore(" //" ) }
344+ .filter { it.isNotEmpty() }
345+ .toSet()
346+
347+ // Collect the canonical repos to query once — multiple "changed modules"
348+ // often collapse to the same canonical name (e.g. `findChangedModules`
349+ // reports both `foo@1.0` removed and `foo@2.0` added, or two modules have
350+ // overlapping extension-created children). Running `rdeps` per distinct
351+ // canonical name — not per module iteration — avoids the redundant work.
352+ val reposToQuery = mutableSetOf<String >()
353+ for (module in changedModules) {
354+ logger.i { " Resolving repos for changed module: ${module.name} (key: ${module.key} )" }
355+ val moduleRepos = reposOwnedBy(module, canonicalRepos, repoMapping)
356+ if (moduleRepos.isEmpty()) {
357+ logger.w { " No external repository found for module ${module.name} " }
358+ continue
321359 }
360+ logger.i { " Found ${moduleRepos.size} repositories for module ${module.name} : ${moduleRepos.joinToString(" , " )} " }
361+ reposToQuery.addAll(moduleRepos)
322362 }
323363
324- if (moduleRepos.isEmpty()) {
325- logger.i { " No external repositories matched any changed module" }
326- return computeSimpleImpactedTargets(emptyMap(), allTargets)
327- }
328-
329- logger.i { " Querying rdeps for ${moduleRepos.size} repositories across ${changedModuleKeys.size} changed modules" }
330-
331364 val impactedTargets = mutableSetOf<String >()
332- try {
333- // Single unioned rdeps query: bazel executes the union in one analysis pass.
334- val queryExpression = " rdeps(//..., ${moduleRepos.joinToString(" + " ) { " @@$it //..." }} )"
335- val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false ) }
336- val rdepLabels = rdeps.map { it.name }.filter { ! it.startsWith(" @@" ) }
337- logger.i { " Found ${rdepLabels.size} workspace targets depending on changed modules" }
338- impactedTargets.addAll(rdepLabels)
339- } catch (e: Exception ) {
340- logger.e(e) { " Unioned rdeps query failed - conservatively marking all workspace targets impacted" }
341- impactedTargets.addAll(allTargets.keys.filter { ! it.startsWith(" @@" ) })
365+ if (reposToQuery.isNotEmpty()) {
366+ try {
367+ // Single unioned rdeps query: bazel executes the union in one analysis pass,
368+ // collapsing N × (startup + analysis) into 1 × (startup + N × analysis).
369+ val queryExpression = " rdeps(//..., ${reposToQuery.joinToString(" + " ) { " @@$it //..." }} )"
370+ logger.i { " Executing unioned rdeps query across ${reposToQuery.size} repositories" }
371+ val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false ) }
372+ val rdepLabels = rdeps.map { it.name }.filter { ! it.startsWith(" @@" ) }
373+ logger.i { " Found ${rdepLabels.size} workspace targets depending on changed modules" }
374+ impactedTargets.addAll(rdepLabels)
375+ } catch (e: Exception ) {
376+ logger.e(e) { " Unioned rdeps query failed - conservatively marking all workspace targets impacted" }
377+ impactedTargets.addAll(allTargets.keys.filter { ! it.startsWith(" @@" ) })
378+ }
342379 }
343380
344- impactedTargets.addAll(computeSimpleImpactedTargets(emptyMap(), allTargets))
381+ // Union with hash-diff results so we still surface labels whose content changed
382+ // independently of any module version bump (e.g. a source file in `//app:app`
383+ // edited in the same commit as a MODULE.bazel update). The earlier
384+ // `computeSimpleImpactedTargets(emptyMap(), allTargets)` form returned every
385+ // key in `allTargets`, which silently defeated the rdeps filtering above.
386+ val directlyChanged = computeSimpleImpactedTargets(from, allTargets)
387+ impactedTargets.addAll(directlyChanged)
345388
346389 logger.i { " Total targets impacted by module changes: ${impactedTargets.size} " }
347390 return impactedTargets
348391 }
392+
393+ /* *
394+ * Canonical repos in `canonicalRepos` that belong to `module`, using Tier A
395+ * (root repo mapping) plus Tier B (name-prefix fallback). See
396+ * [queryTargetsDependingOnModules] for the full contract.
397+ *
398+ * @param module Changed module whose owned repos we want to resolve
399+ * @param canonicalRepos Set of canonical repo names parsed from `allTargets.keys`
400+ * @param repoMapping Root module's apparent→canonical repo mapping (may be empty)
401+ * @return Canonical repos belonging to `module`, or empty if none matched (Tier C)
402+ */
403+ private fun reposOwnedBy (
404+ module : Module ,
405+ canonicalRepos : Set <String >,
406+ repoMapping : Map <String , String >
407+ ): Set <String > {
408+ val prefixes = mutableListOf<String >()
409+ val exactMatches = mutableSetOf<String >()
410+
411+ val mappedCanonical = repoMapping[module.apparentName]
412+ if (! mappedCanonical.isNullOrEmpty()) {
413+ exactMatches.add(mappedCanonical)
414+ prefixes.add(" $mappedCanonical +" )
415+ prefixes.add(" $mappedCanonical ~" )
416+ }
417+
418+ // Tier B prefixes always apply — they cover transitive modules that root's
419+ // `dump_repo_mapping` does not include, and also act as the sole source when
420+ // `discoverRepoMapping` returned empty. `++`/`~~` are subsumed by `+`/`~`
421+ // (extension-created repos start with `name+` or `name~` by definition).
422+ prefixes.add(" ${module.name} +" )
423+ prefixes.add(" ${module.name} ~" )
424+
425+ return canonicalRepos.filter { repo ->
426+ repo in exactMatches || prefixes.any { repo.startsWith(it) }
427+ }.toSet()
428+ }
349429}
0 commit comments