Skip to content

Commit a61697d

Browse files
committed
Tighten canonical-repo match in queryTargetsDependingOnModules
On MODULE.bazel-touching commits the impacted-targets path fans out one `bazel query rdeps(//..., @@<repo>//...)` subprocess per canonical repo whose label contains the changed module's name as a substring. A module named "cpp" pulled in every `abseil-cpp+//...` label; in workspaces with many bzlmod module extensions the fan-out reached thousands of serial subprocesses per `get-impacted-targets` call. Replace the `contains` scan with a two-tier predicate that matches on the parsed canonical repo name: 1. Tier A: consult `bazel mod dump_repo_mapping ""` (already implemented in BazelQueryService.discoverRepoMapping; made public for reuse). If the mapping resolves `module.apparentName` to canonical C, any repo equal to C or starting with "C+"/"C~" belongs to that module. 2. Tier B: prefix-match `module.name + "+"` / `module.name + "~"` for transitive modules that root's repo mapping does not include and when discoverRepoMapping fails. Extension-created forms (`name++ext+repo`, `name~~ext~repo`) are subsumed by these prefixes. Canonical repos to query are collected across all changed modules before running rdeps, which avoids duplicate queries when findChangedModules reports a version bump as both an add and a remove (two Module objects sharing the same canonical). Also fix a pre-existing bug in the same function: the trailing `computeSimpleImpactedTargets(emptyMap(), allTargets)` call returned `allTargets.keys`, silently unioning every target back into the rdeps result. Thread `from` through the call chain so the union uses the real hash diff. Without this, the rdeps filtering added here would be cost-only — the impacted set would still equal the full workspace. Tests: new CalculateImpactedTargetsInteractorModuleQueryTest covers canonical-plus, canonical-tilde, extension-created-via-mapping, extension-created-via-name-fallback, the "cpp must not match abseil-cpp" regression, transitive-only-via-name-fallback, ghost-module (Tier C skip), multiple-disjoint-modules, discoverRepoMapping failure, and the executeWithDistances module-query path. Output assertions guard against regression to the `emptyMap()` union bug.
1 parent 2f6b341 commit a61697d

4 files changed

Lines changed: 510 additions & 68 deletions

File tree

cli/BUILD

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ kt_jvm_test(
7272
runtime_deps = [":cli-test-lib"],
7373
)
7474

75+
kt_jvm_test(
76+
name = "CalculateImpactedTargetsInteractorModuleQueryTest",
77+
test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorModuleQueryTest",
78+
runtime_deps = [":cli-test-lib"],
79+
)
80+
7581
kt_jvm_test(
7682
name = "NormalisingPathConverterTest",
7783
test_class = "com.bazel_diff.cli.converter.NormalisingPathConverterTest",

cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,13 @@ class BazelQueryService(
349349
* `bazel mod dump_repo_mapping ""`. Returns a map of apparent name → canonical name.
350350
* Filters out internal repos (bazel_tools, _builtins, local_config_*) that aren't
351351
* relevant for dependency hashing.
352+
*
353+
* Used by both `queryBzlmodRepos` (for synthetic //external:* target generation) and
354+
* `CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules` (to resolve a
355+
* changed module's canonical repo prefix without substring matching `allTargets.keys`).
352356
*/
353357
@OptIn(ExperimentalCoroutinesApi::class)
354-
private suspend fun discoverRepoMapping(): Map<String, String> {
358+
suspend fun discoverRepoMapping(): Map<String, String> {
355359
val cmd: MutableList<String> =
356360
ArrayList<String>().apply {
357361
add(bazelPath.toString())

cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt

Lines changed: 137 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.bazel_diff.interactor
22

33
import com.bazel_diff.bazel.BazelQueryService
4+
import com.bazel_diff.bazel.Module
45
import com.bazel_diff.bazel.ModuleGraphParser
56
import com.bazel_diff.hash.TargetHash
67
import com.bazel_diff.log.Logger
@@ -53,7 +54,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
5354

5455
val impactedTargets = if (changedModules.isNotEmpty()) {
5556
logger.i { "Module changes detected - querying for targets that depend on changed modules" }
56-
queryTargetsDependingOnModules(changedModules, to)
57+
queryTargetsDependingOnModules(changedModules, from, to)
5758
} else {
5859
computeSimpleImpactedTargets(from, to)
5960
}
@@ -106,7 +107,7 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
106107

107108
val impactedTargets = if (changedModules.isNotEmpty()) {
108109
logger.i { "Module changes detected - querying for targets that depend on changed modules" }
109-
val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, to)
110+
val moduleImpactedTargets = queryTargetsDependingOnModules(changedModules, from, to)
110111
// Mark module-impacted targets with distance 0, then compute distances from there
111112
val moduleImpactedHashes = from.filterKeys { !moduleImpactedTargets.contains(it) }
112113
computeAllDistances(moduleImpactedHashes, to, depEdges)
@@ -239,61 +240,76 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
239240
}
240241

241242
/**
242-
* Detects module changes by comparing module graphs and returns changed module keys.
243+
* Detects module changes by comparing module graphs and returns the changed Modules.
243244
*
244-
* This method:
245-
* 1. Parses the from and to module graphs
246-
* 2. Identifies which modules changed (added, removed, or version changed)
247-
* 3. Logs the changes for visibility
248-
* 4. Returns the set of changed module keys
245+
* Resolves each changed key against the "to" graph first (the state we will query
246+
* against), falling back to the "from" graph for modules that were removed.
249247
*
250-
* @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for starting revision
251-
* @param toModuleGraphJson JSON from `bazel mod graph --output=json` for final revision
252-
* @return Set of changed module keys, empty if no changes
248+
* @param fromModuleGraphJson JSON from `bazel mod graph --output=json` for the starting revision
249+
* @param toModuleGraphJson JSON from `bazel mod graph --output=json` for the final revision
250+
* @return Set of changed Modules, empty if no changes
253251
*/
254252
private fun detectChangedModules(
255253
fromModuleGraphJson: String?,
256254
toModuleGraphJson: String?
257-
): Set<String> {
258-
// If either module graph is missing, assume no changes
255+
): Set<Module> {
259256
if (fromModuleGraphJson == null || toModuleGraphJson == null) {
260257
return emptySet()
261258
}
262259

263-
// Parse module graphs
264260
val fromGraph = moduleGraphParser.parseModuleGraph(fromModuleGraphJson)
265261
val toGraph = moduleGraphParser.parseModuleGraph(toModuleGraphJson)
262+
val changedKeys = moduleGraphParser.findChangedModules(fromGraph, toGraph)
266263

267-
// Find changed modules
268-
val changedModules = moduleGraphParser.findChangedModules(fromGraph, toGraph)
269-
270-
if (changedModules.isEmpty()) {
264+
if (changedKeys.isEmpty()) {
271265
logger.i { "No module changes detected" }
272-
} else {
273-
logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ")}" }
266+
return emptySet()
274267
}
275268

269+
val changedModules = changedKeys.mapNotNull { key -> toGraph[key] ?: fromGraph[key] }.toSet()
270+
logger.i { "Detected ${changedModules.size} module changes: ${changedModules.joinToString(", ") { it.key }}" }
276271
return changedModules
277272
}
278273

279274
/**
280275
* Queries Bazel to find all targets that depend on the changed modules.
281276
*
282-
* This uses an efficient module-level query approach:
283-
* 1. Identifies which external repository each changed module corresponds to
284-
* 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
285-
* 3. Returns the union of all impacted targets
277+
* For each changed module M we compute the set of canonical repos in `allTargets`
278+
* that belong to M, then run `rdeps(//..., @@<repo>//...)` once per canonical repo.
279+
*
280+
* Repo ownership is decided by two ordered predicates:
286281
*
287-
* @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
288-
* @param allTargets Map of all targets from the final revision
282+
* 1. Tier A — root repo mapping. If `bazel mod dump_repo_mapping ""` maps
283+
* `M.apparentName` to canonical C, any repo R with R == C or R starts with
284+
* "C+" / "C~" belongs to M. Covers extension-created children (`C++ext+repo`,
285+
* `C~~ext~repo`) whose parent canonical lives in M.
286+
* 2. Tier B — name-prefix fallback. R belongs to M if R starts with
287+
* "{M.name}+" or "{M.name}~". Extension-created forms (`name++ext+repo`,
288+
* `name~~ext~repo`) are already covered by these prefixes. Handles
289+
* transitive modules absent from root's mapping and the case where
290+
* `discoverRepoMapping` failed.
291+
*
292+
* Modules that match nothing (Tier C) are logged and skipped — a module with no
293+
* materialised repos in `allTargets.keys` cannot impact any hashed target, and
294+
* `computeSimpleImpactedTargets` still runs below to catch direct source changes.
295+
*
296+
* The key invariant vs. the previous implementation: we match on the parsed
297+
* canonical repo name (`label.substring(2).substringBefore("//")`), not on a
298+
* `contains` substring of the full label, so a module named "cpp" no longer
299+
* matches canonical `abseil-cpp+`.
300+
*
301+
* @param changedModules Modules identified as changed between the two graphs
302+
* @param from Starting-revision target hashes; used only to pick up labels whose
303+
* content changed independently of any module bump
304+
* @param allTargets Final-revision target hashes (the set we can query against)
289305
* @return Set of target labels that are impacted by module changes
290306
*/
291307
private fun queryTargetsDependingOnModules(
292-
changedModuleKeys: Set<String>,
308+
changedModules: Set<Module>,
309+
from: Map<String, TargetHash>,
293310
allTargets: Map<String, TargetHash>
294311
): Set<String> {
295312
return try {
296-
// Inject BazelQueryService if available
297313
val queryService: BazelQueryService? = try {
298314
inject<BazelQueryService>().value
299315
} catch (e: Exception) {
@@ -305,59 +321,113 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
305321
return allTargets.keys
306322
}
307323

308-
val impactedTargets = mutableSetOf<String>()
309-
310-
for (moduleKey in changedModuleKeys) {
311-
// Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
312-
val moduleName = moduleKey.substringBefore("@")
313-
logger.i { "Querying targets depending on module: $moduleName (key: $moduleKey)" }
314-
315-
// Find the canonical repository name for this module from allTargets
316-
// Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
317-
val moduleRepos = allTargets.keys
318-
.filter { it.startsWith("@@") && it.contains(moduleName) }
319-
.map { it.substring(2).substringBefore("//") } // Extract repo name
320-
.toSet()
321-
324+
val repoMapping: Map<String, String> =
325+
try {
326+
runBlocking { queryService.discoverRepoMapping() }
327+
} catch (e: Exception) {
328+
logger.w { "discoverRepoMapping failed, falling back to module-name matching: ${e.message}" }
329+
emptyMap()
330+
}
331+
// Log size so operators can distinguish "Tier A had nothing to match
332+
// against" from "Tier A matched but the module wasn't in root mapping".
333+
// `discoverRepoMapping` returns an empty map on subprocess exit != 0
334+
// without throwing, so we cannot rely on the catch block above to
335+
// surface that case.
336+
logger.i { "Discovered ${repoMapping.size} root repo mapping entries" }
337+
338+
// Parse `allTargets` into the set of canonical repo names once, instead of
339+
// rescanning every label per changed module.
340+
val canonicalRepos: Set<String> = allTargets.keys.asSequence()
341+
.filter { it.startsWith("@@") }
342+
.map { it.substring(2).substringBefore("//") }
343+
.filter { it.isNotEmpty() }
344+
.toSet()
345+
346+
// Collect the canonical repos to query once — multiple "changed modules"
347+
// often collapse to the same canonical name (e.g. `findChangedModules`
348+
// reports both `foo@1.0` removed and `foo@2.0` added, or two modules have
349+
// overlapping extension-created children). Running `rdeps` per distinct
350+
// canonical name — not per module iteration — avoids the redundant work.
351+
val reposToQuery = mutableSetOf<String>()
352+
for (module in changedModules) {
353+
logger.i { "Resolving repos for changed module: ${module.name} (key: ${module.key})" }
354+
val moduleRepos = reposOwnedBy(module, canonicalRepos, repoMapping)
322355
if (moduleRepos.isEmpty()) {
323-
logger.w { "No external repository found for module $moduleName" }
356+
logger.w { "No external repository found for module ${module.name}" }
324357
continue
325358
}
359+
logger.i { "Found ${moduleRepos.size} repositories for module ${module.name}: ${moduleRepos.joinToString(", ")}" }
360+
reposToQuery.addAll(moduleRepos)
361+
}
326362

327-
logger.i { "Found ${moduleRepos.size} repositories for module $moduleName: ${moduleRepos.joinToString(", ")}" }
328-
329-
// Query workspace targets that depend on any target in the changed module repo
330-
for (repoName in moduleRepos) {
331-
try {
332-
// Use rdeps to find all workspace targets depending on this module
333-
// rdeps(universe, target_set) finds all targets in universe that depend on target_set
334-
val queryExpression = "rdeps(//..., @@$repoName//...)"
335-
logger.i { "Executing query: $queryExpression" }
336-
337-
val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
338-
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") } // Filter to workspace targets only
339-
340-
logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
341-
impactedTargets.addAll(rdepLabels)
342-
} catch (e: Exception) {
343-
logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
344-
logger.w { "Conservatively marking all targets as impacted for this module" }
345-
// On error for this module, add all workspace targets
346-
impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
347-
}
363+
val impactedTargets = mutableSetOf<String>()
364+
for (repoName in reposToQuery) {
365+
try {
366+
val queryExpression = "rdeps(//..., @@$repoName//...)"
367+
logger.i { "Executing query: $queryExpression" }
368+
369+
val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
370+
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") }
371+
372+
logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
373+
impactedTargets.addAll(rdepLabels)
374+
} catch (e: Exception) {
375+
logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
376+
logger.w { "Conservatively marking all targets as impacted for this module" }
377+
impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
348378
}
349379
}
350380

351-
// Add directly changed targets from hash comparison (e.g., code changes)
352-
val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
381+
// Union with hash-diff results so we still surface labels whose content changed
382+
// independently of any module version bump (e.g. a source file in `//app:app`
383+
// edited in the same commit as a MODULE.bazel update). The earlier
384+
// `computeSimpleImpactedTargets(emptyMap(), allTargets)` form returned every
385+
// key in `allTargets`, which silently defeated the rdeps filtering above.
386+
val directlyChanged = computeSimpleImpactedTargets(from, allTargets)
353387
impactedTargets.addAll(directlyChanged)
354388

355389
logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
356390
impactedTargets
357391
} catch (e: Exception) {
358392
logger.e(e) { "Error querying targets depending on modules" }
359-
// On error, conservatively mark all targets as impacted
360393
allTargets.keys
361394
}
362395
}
396+
397+
/**
398+
* Canonical repos in `canonicalRepos` that belong to `module`, using Tier A
399+
* (root repo mapping) plus Tier B (name-prefix fallback). See
400+
* [queryTargetsDependingOnModules] for the full contract.
401+
*
402+
* @param module Changed module whose owned repos we want to resolve
403+
* @param canonicalRepos Set of canonical repo names parsed from `allTargets.keys`
404+
* @param repoMapping Root module's apparent→canonical repo mapping (may be empty)
405+
* @return Canonical repos belonging to `module`, or empty if none matched (Tier C)
406+
*/
407+
private fun reposOwnedBy(
408+
module: Module,
409+
canonicalRepos: Set<String>,
410+
repoMapping: Map<String, String>
411+
): Set<String> {
412+
val prefixes = mutableListOf<String>()
413+
val exactMatches = mutableSetOf<String>()
414+
415+
val mappedCanonical = repoMapping[module.apparentName]
416+
if (!mappedCanonical.isNullOrEmpty()) {
417+
exactMatches.add(mappedCanonical)
418+
prefixes.add("$mappedCanonical+")
419+
prefixes.add("$mappedCanonical~")
420+
}
421+
422+
// Tier B prefixes always apply — they cover transitive modules that root's
423+
// `dump_repo_mapping` does not include, and also act as the sole source when
424+
// `discoverRepoMapping` returned empty. `++`/`~~` are subsumed by `+`/`~`
425+
// (extension-created repos start with `name+` or `name~` by definition).
426+
prefixes.add("${module.name}+")
427+
prefixes.add("${module.name}~")
428+
429+
return canonicalRepos.filter { repo ->
430+
repo in exactMatches || prefixes.any { repo.startsWith(it) }
431+
}.toSet()
432+
}
363433
}

0 commit comments

Comments
 (0)