11package com.bazel_diff.interactor
22
33import com.bazel_diff.bazel.BazelQueryService
4+ import com.bazel_diff.bazel.Module
45import com.bazel_diff.bazel.ModuleGraphParser
56import com.bazel_diff.hash.TargetHash
67import com.bazel_diff.log.Logger
@@ -53,7 +54,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
5354
5455 val impactedTargets = if (changedModules.isNotEmpty()) {
5556 logger.i { " Module changes detected - querying for targets that depend on changed modules" }
56- queryTargetsDependingOnModules(changedModules, to)
57+ queryTargetsDependingOnModules(changedModules, from, to)
5758 } else {
5859 computeSimpleImpactedTargets(from, to)
5960 }
@@ -106,7 +107,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
106107
107108 val impactedTargets = if (changedModules.isNotEmpty()) {
108109 logger.i { " Module changes detected - querying for targets that depend on changed modules" }
109- val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, to)
110+ val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, from, to)
110111 // Mark module-impacted targets with distance 0, then compute distances from there
111112 val moduleImpactedHashes = from.filterKeys { ! moduleImpactedTargets.contains(it) }
112113 computeAllDistances(moduleImpactedHashes, to, depEdges)
@@ -239,61 +240,76 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
239240 }
240241
241242 /* *
242- * Detects module changes by comparing module graphs and returns changed module keys .
243+ * Detects module changes by comparing module graphs and returns the changed Modules .
243244 *
244- * This method:
245- * 1. Parses the from and to module graphs
246- * 2. Identifies which modules changed (added, removed, or version changed)
247- * 3. Logs the changes for visibility
248- * 4. Returns the set of changed module keys
245+ * Resolves each changed key against the "to" graph first (the state we will query
246+ * against), falling back to the "from" graph for modules that were removed.
249247 *
250- * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for starting revision
251- * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for final revision
252- * @return Set of changed module keys , empty if no changes
248+ * @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for the starting revision
249+ * @param toModuleGraphJson JSON from `bazel mod graph --output=json` for the final revision
250+ * @return Set of changed Modules , empty if no changes
253251 */
254252 private fun detectChangedModules (
255253 fromModuleGraphJson : String? ,
256254 toModuleGraphJson : String?
257- ): Set <String > {
258- // If either module graph is missing, assume no changes
255+ ): Set <Module > {
259256 if (fromModuleGraphJson == null || toModuleGraphJson == null ) {
260257 return emptySet()
261258 }
262259
263- // Parse module graphs
264260 val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson)
265261 val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson)
262+ val changedKeys = moduleGraphParser.findChangedModules(fromGraph, toGraph)
266263
267- // Find changed modules
268- val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph)
269-
270- if (changedModules.isEmpty()) {
264+ if (changedKeys.isEmpty()) {
271265 logger.i { " No module changes detected" }
272- } else {
273- logger.i { " Detected ${changedModules.size} module changes: ${changedModules.joinToString(" , " )} " }
266+ return emptySet()
274267 }
275268
269+ val changedModules = changedKeys.mapNotNull { key -> toGraph[key] ? : fromGraph[key] }.toSet()
270+ logger.i { " Detected ${changedModules.size} module changes: ${changedModules.joinToString(" , " ) { it.key }} " }
276271 return changedModules
277272 }
278273
279274 /* *
280275 * Queries Bazel to find all targets that depend on the changed modules.
281276 *
282- * This uses an efficient module-level query approach:
283- * 1. Identifies which external repository each changed module corresponds to
284- * 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
285- * 3. Returns the union of all impacted targets
277+ * For each changed module M we compute the set of canonical repos in `allTargets`
278+ * that belong to M, then run `rdeps(//..., @@<repo>//...)` once per canonical repo.
279+ *
280+ * Repo ownership is decided by two ordered predicates:
286281 *
287- * @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
288- * @param allTargets Map of all targets from the final revision
282+ * 1. Tier A — root repo mapping. If `bazel mod dump_repo_mapping ""` maps
283+ * `M.apparentName` to canonical C, any repo R with R == C or R starts with
284+ * "C+" / "C~" belongs to M. Covers extension-created children (`C++ext+repo`,
285+ * `C~~ext~repo`) whose parent canonical lives in M.
286+ * 2. Tier B — name-prefix fallback. R belongs to M if R starts with
287+ * "{M.name}+" or "{M.name}~". Extension-created forms (`name++ext+repo`,
288+ * `name~~ext~repo`) are already covered by these prefixes. Handles
289+ * transitive modules absent from root's mapping and the case where
290+ * `discoverRepoMapping` failed.
291+ *
292+ * Modules that match nothing (Tier C) are logged and skipped — a module with no
293+ * materialised repos in `allTargets.keys` cannot impact any hashed target, and
294+ * `computeSimpleImpactedTargets` still runs below to catch direct source changes.
295+ *
296+ * The key invariant vs. the previous implementation: we match on the parsed
297+ * canonical repo name (`label.substring(2).substringBefore("//")`), not on a
298+ * `contains` substring of the full label, so a module named "cpp" no longer
299+ * matches canonical `abseil-cpp+`.
300+ *
301+ * @param changedModules Modules identified as changed between the two graphs
302+ * @param from Starting-revision target hashes; used only to pick up labels whose
303+ * content changed independently of any module bump
304+ * @param allTargets Final-revision target hashes (the set we can query against)
289305 * @return Set of target labels that are impacted by module changes
290306 */
291307 private fun queryTargetsDependingOnModules (
292- changedModuleKeys : Set <String >,
308+ changedModules : Set <Module >,
309+ from : Map <String , TargetHash >,
293310 allTargets : Map <String , TargetHash >
294311 ): Set <String > {
295312 return try {
296- // Inject BazelQueryService if available
297313 val queryService: BazelQueryService ? = try {
298314 inject<BazelQueryService >().value
299315 } catch (e: Exception ) {
@@ -305,59 +321,113 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
305321 return allTargets.keys
306322 }
307323
308- val impactedTargets = mutableSetOf<String >()
309-
310- for (moduleKey in changedModuleKeys) {
311- // Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
312- val moduleName = moduleKey.substringBefore(" @" )
313- logger.i { " Querying targets depending on module: $moduleName (key: $moduleKey )" }
314-
315- // Find the canonical repository name for this module from allTargets
316- // Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
317- val moduleRepos = allTargets.keys
318- .filter { it.startsWith(" @@" ) && it.contains(moduleName) }
319- .map { it.substring(2 ).substringBefore(" //" ) } // Extract repo name
320- .toSet()
321-
324+ val repoMapping: Map <String , String > =
325+ try {
326+ runBlocking { queryService.discoverRepoMapping() }
327+ } catch (e: Exception ) {
328+ logger.w { " discoverRepoMapping failed, falling back to module-name matching: ${e.message} " }
329+ emptyMap()
330+ }
331+ // Log size so operators can distinguish "Tier A had nothing to match
332+ // against" from "Tier A matched but the module wasn't in root mapping".
333+ // `discoverRepoMapping` returns an empty map on subprocess exit != 0
334+ // without throwing, so we cannot rely on the catch block above to
335+ // surface that case.
336+ logger.i { " Discovered ${repoMapping.size} root repo mapping entries" }
337+
338+ // Parse `allTargets` into the set of canonical repo names once, instead of
339+ // rescanning every label per changed module.
340+ val canonicalRepos: Set <String > = allTargets.keys.asSequence()
341+ .filter { it.startsWith(" @@" ) }
342+ .map { it.substring(2 ).substringBefore(" //" ) }
343+ .filter { it.isNotEmpty() }
344+ .toSet()
345+
346+ // Collect the canonical repos to query once — multiple "changed modules"
347+ // often collapse to the same canonical name (e.g. `findChangedModules`
348+ // reports both `foo@1.0` removed and `foo@2.0` added, or two modules have
349+ // overlapping extension-created children). Running `rdeps` per distinct
350+ // canonical name — not per module iteration — avoids the redundant work.
351+ val reposToQuery = mutableSetOf<String >()
352+ for (module in changedModules) {
353+ logger.i { " Resolving repos for changed module: ${module.name} (key: ${module.key} )" }
354+ val moduleRepos = reposOwnedBy(module, canonicalRepos, repoMapping)
322355 if (moduleRepos.isEmpty()) {
323- logger.w { " No external repository found for module $moduleName " }
356+ logger.w { " No external repository found for module ${module.name} " }
324357 continue
325358 }
359+ logger.i { " Found ${moduleRepos.size} repositories for module ${module.name} : ${moduleRepos.joinToString(" , " )} " }
360+ reposToQuery.addAll(moduleRepos)
361+ }
326362
327- logger.i { " Found ${moduleRepos.size} repositories for module $moduleName : ${moduleRepos.joinToString(" , " )} " }
328-
329- // Query workspace targets that depend on any target in the changed module repo
330- for (repoName in moduleRepos) {
331- try {
332- // Use rdeps to find all workspace targets depending on this module
333- // rdeps(universe, target_set) finds all targets in universe that depend on target_set
334- val queryExpression = " rdeps(//..., @@$repoName //...)"
335- logger.i { " Executing query: $queryExpression " }
336-
337- val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false ) }
338- val rdepLabels = rdeps.map { it.name }.filter { ! it.startsWith(" @@" ) } // Filter to workspace targets only
339-
340- logger.i { " Found ${rdepLabels.size} workspace targets depending on @@$repoName " }
341- impactedTargets.addAll(rdepLabels)
342- } catch (e: Exception ) {
343- logger.w { " Failed to query rdeps for @@$repoName : ${e.message} " }
344- logger.w { " Conservatively marking all targets as impacted for this module" }
345- // On error for this module, add all workspace targets
346- impactedTargets.addAll(allTargets.keys.filter { ! it.startsWith(" @@" ) })
347- }
363+ val impactedTargets = mutableSetOf<String >()
364+ for (repoName in reposToQuery) {
365+ try {
366+ val queryExpression = " rdeps(//..., @@$repoName //...)"
367+ logger.i { " Executing query: $queryExpression " }
368+
369+ val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false ) }
370+ val rdepLabels = rdeps.map { it.name }.filter { ! it.startsWith(" @@" ) }
371+
372+ logger.i { " Found ${rdepLabels.size} workspace targets depending on @@$repoName " }
373+ impactedTargets.addAll(rdepLabels)
374+ } catch (e: Exception ) {
375+ logger.w { " Failed to query rdeps for @@$repoName : ${e.message} " }
376+ logger.w { " Conservatively marking all targets as impacted for this module" }
377+ impactedTargets.addAll(allTargets.keys.filter { ! it.startsWith(" @@" ) })
348378 }
349379 }
350380
351- // Add directly changed targets from hash comparison (e.g., code changes)
352- val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
381+ // Union with hash-diff results so we still surface labels whose content changed
382+ // independently of any module version bump (e.g. a source file in `//app:app`
383+ // edited in the same commit as a MODULE.bazel update). The earlier
384+ // `computeSimpleImpactedTargets(emptyMap(), allTargets)` form returned every
385+ // key in `allTargets`, which silently defeated the rdeps filtering above.
386+ val directlyChanged = computeSimpleImpactedTargets(from, allTargets)
353387 impactedTargets.addAll(directlyChanged)
354388
355389 logger.i { " Total targets impacted by module changes: ${impactedTargets.size} " }
356390 impactedTargets
357391 } catch (e: Exception ) {
358392 logger.e(e) { " Error querying targets depending on modules" }
359- // On error, conservatively mark all targets as impacted
360393 allTargets.keys
361394 }
362395 }
396+
397+ /* *
398+ * Canonical repos in `canonicalRepos` that belong to `module`, using Tier A
399+ * (root repo mapping) plus Tier B (name-prefix fallback). See
400+ * [queryTargetsDependingOnModules] for the full contract.
401+ *
402+ * @param module Changed module whose owned repos we want to resolve
403+ * @param canonicalRepos Set of canonical repo names parsed from `allTargets.keys`
404+ * @param repoMapping Root module's apparent→canonical repo mapping (may be empty)
405+ * @return Canonical repos belonging to `module`, or empty if none matched (Tier C)
406+ */
407+ private fun reposOwnedBy (
408+ module : Module ,
409+ canonicalRepos : Set <String >,
410+ repoMapping : Map <String , String >
411+ ): Set <String > {
412+ val prefixes = mutableListOf<String >()
413+ val exactMatches = mutableSetOf<String >()
414+
415+ val mappedCanonical = repoMapping[module.apparentName]
416+ if (! mappedCanonical.isNullOrEmpty()) {
417+ exactMatches.add(mappedCanonical)
418+ prefixes.add(" $mappedCanonical +" )
419+ prefixes.add(" $mappedCanonical ~" )
420+ }
421+
422+ // Tier B prefixes always apply — they cover transitive modules that root's
423+ // `dump_repo_mapping` does not include, and also act as the sole source when
424+ // `discoverRepoMapping` returned empty. `++`/`~~` are subsumed by `+`/`~`
425+ // (extension-created repos start with `name+` or `name~` by definition).
426+ prefixes.add(" ${module.name} +" )
427+ prefixes.add(" ${module.name} ~" )
428+
429+ return canonicalRepos.filter { repo ->
430+ repo in exactMatches || prefixes.any { repo.startsWith(it) }
431+ }.toSet()
432+ }
363433}
0 commit comments