Skip to content

Commit e8b2f80

Browse files
committed
perf: union rdeps queries across changed modules into one subprocess
CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules previously spawned one `bazel query rdeps(//..., @@<repo>//...)` subprocess per changed-module-matching canonical repo. A single changed module can substring-match thousands of repos in a bzlmod workspace, and each subprocess pays ~2s of JVM + bazel-client-connect overhead serially. Union all matched repos into a single `rdeps(//..., @@a//... + @@b//... + ...)` query. Bazel computes the reverse-dep graph of //... once regardless of how many patterns are in the union, so runtime collapses from N × (startup + analysis) to 1 × (startup + analysis); the N-1 eliminated subprocesses are the bulk of the saving. - No command-line length concern: BazelQueryService.runQuery writes queries via --query_file, so the query string is arbitrary size. - Failure semantics: a single try/catch wraps the unioned query; on failure, fall back to marking all workspace targets impacted. The previous outer catch-all is removed - audit confirmed every throwable call now has a tight try/catch around it, and the broad catch was silently swallowing errors. - Per-module log line preserves "module X → matched N repos: ..." attribution so as to not break logging contract - Tests: adds testUnionsRdepsAcrossChangedModules with two changed modules to assert the single-query invariant.
1 parent 3657b96 commit e8b2f80

2 files changed

Lines changed: 121 additions & 65 deletions

File tree

cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt

Lines changed: 51 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,11 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
277277
}
278278

279279
/**
280-
* Queries Bazel to find all targets that depend on the changed modules.
280+
* Queries Bazel to find all workspace targets that depend on any changed module.
281281
*
282-
* This uses an efficient module-level query approach:
283-
* 1. Identifies which external repository each changed module corresponds to
284-
* 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
285-
* 3. Returns the union of all impacted targets
282+
* Maps every changed module to its matching bzlmod canonical repos, then issues a
283+
* single `rdeps(//..., @@a//... + @@b//... + ...)` query. Bazel executes the union
284+
* in one analysis pass, avoiding per-repo subprocess fan-out.
286285
*
287286
* @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
288287
* @param allTargets Map of all targets from the final revision
@@ -292,72 +291,59 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
292291
changedModuleKeys: Set<String>,
293292
allTargets: Map<String, TargetHash>
294293
): Set<String> {
295-
return try {
296-
// Inject BazelQueryService if available
297-
val queryService: BazelQueryService? = try {
298-
inject<BazelQueryService>().value
299-
} catch (e: Exception) {
300-
null
301-
}
302-
303-
if (queryService == null) {
304-
logger.w { "BazelQueryService not available - cannot query for module dependencies" }
305-
return allTargets.keys
306-
}
307-
308-
val impactedTargets = mutableSetOf<String>()
309-
310-
for (moduleKey in changedModuleKeys) {
311-
// Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
312-
val moduleName = moduleKey.substringBefore("@")
313-
logger.i { "Querying targets depending on module: $moduleName (key: $moduleKey)" }
314-
315-
// Find the canonical repository name for this module from allTargets
316-
// Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
317-
val moduleRepos = allTargets.keys
318-
.filter { it.startsWith("@@") && it.contains(moduleName) }
319-
.map { it.substring(2).substringBefore("//") } // Extract repo name
320-
.toSet()
294+
val queryService: BazelQueryService? = try {
295+
inject<BazelQueryService>().value
296+
} catch (e: Exception) {
297+
null
298+
}
321299

322-
if (moduleRepos.isEmpty()) {
323-
logger.w { "No external repository found for module $moduleName" }
324-
continue
325-
}
300+
if (queryService == null) {
301+
logger.w { "BazelQueryService not available - cannot query for module dependencies" }
302+
return allTargets.keys
303+
}
326304

327-
logger.i { "Found ${moduleRepos.size} repositories for module $moduleName: ${moduleRepos.joinToString(", ")}" }
328-
329-
// Query workspace targets that depend on any target in the changed module repo
330-
for (repoName in moduleRepos) {
331-
try {
332-
// Use rdeps to find all workspace targets depending on this module
333-
// rdeps(universe, target_set) finds all targets in universe that depend on target_set
334-
val queryExpression = "rdeps(//..., @@$repoName//...)"
335-
logger.i { "Executing query: $queryExpression" }
336-
337-
val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
338-
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") } // Filter to workspace targets only
339-
340-
logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
341-
impactedTargets.addAll(rdepLabels)
342-
} catch (e: Exception) {
343-
logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
344-
logger.w { "Conservatively marking all targets as impacted for this module" }
345-
// On error for this module, add all workspace targets
346-
impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
347-
}
348-
}
305+
// Map every changed module to its matching bzlmod canonical repos. A single module
306+
// name can match multiple canonical repos (e.g. rules_jvm_external matches
307+
// rules_jvm_external~~maven~maven, rules_jvm_external~~toolchains~...). Log per
308+
// module so an operator can attribute a pathologically large impacted set back to
309+
// a specific module bump.
310+
val moduleRepos = mutableSetOf<String>()
311+
for (moduleKey in changedModuleKeys) {
312+
val moduleName = moduleKey.substringBefore("@")
313+
val matched = allTargets.keys
314+
.filter { it.startsWith("@@") && it.contains(moduleName) }
315+
.map { it.substring(2).substringBefore("//") }
316+
if (matched.isEmpty()) {
317+
logger.w { "No external repository matched module $moduleKey" }
318+
} else {
319+
logger.i { "Module $moduleKey matched ${matched.size} repos: ${matched.joinToString(", ")}" }
320+
moduleRepos.addAll(matched)
349321
}
322+
}
350323

351-
// Add directly changed targets from hash comparison (e.g., code changes)
352-
val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
353-
impactedTargets.addAll(directlyChanged)
324+
if (moduleRepos.isEmpty()) {
325+
logger.i { "No external repositories matched any changed module" }
326+
return computeSimpleImpactedTargets(emptyMap(), allTargets)
327+
}
354328

355-
logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
356-
impactedTargets
329+
logger.i { "Querying rdeps for ${moduleRepos.size} repositories across ${changedModuleKeys.size} changed modules" }
330+
331+
val impactedTargets = mutableSetOf<String>()
332+
try {
333+
// Single unioned rdeps query: bazel executes the union in one analysis pass.
334+
val queryExpression = "rdeps(//..., ${moduleRepos.joinToString(" + ") { "@@$it//..." }})"
335+
val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
336+
val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") }
337+
logger.i { "Found ${rdepLabels.size} workspace targets depending on changed modules" }
338+
impactedTargets.addAll(rdepLabels)
357339
} catch (e: Exception) {
358-
logger.e(e) { "Error querying targets depending on modules" }
359-
// On error, conservatively mark all targets as impacted
360-
allTargets.keys
340+
logger.e(e) { "Unioned rdeps query failed - conservatively marking all workspace targets impacted" }
341+
impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
361342
}
343+
344+
impactedTargets.addAll(computeSimpleImpactedTargets(emptyMap(), allTargets))
345+
346+
logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
347+
return impactedTargets
362348
}
363349
}

cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,21 @@ package com.bazel_diff.interactor
22

33
import assertk.assertThat
44
import assertk.assertions.*
5+
import com.bazel_diff.bazel.BazelQueryService
6+
import com.bazel_diff.bazel.BazelTarget
57
import com.bazel_diff.hash.TargetHash
68
import com.bazel_diff.testModule
79
import java.io.StringWriter
810
import org.junit.Rule
911
import org.junit.Test
12+
import org.koin.core.context.loadKoinModules
13+
import org.koin.dsl.module
1014
import org.koin.test.KoinTest
1115
import org.koin.test.KoinTestRule
1216
import org.mockito.junit.MockitoJUnit
17+
import org.mockito.kotlin.any
18+
import org.mockito.kotlin.doAnswer
19+
import org.mockito.kotlin.mock
1320

1421
class CalculateImpactedTargetsInteractorTest : KoinTest {
1522
@get:Rule val mockitoRule = MockitoJUnit.rule()
@@ -639,4 +646,67 @@ class CalculateImpactedTargetsInteractorTest : KoinTest {
639646
// Only target1 should be impacted (hash changed) - module logic was skipped
640647
assertThat(output).containsExactly("//:target1")
641648
}
649+
650+
@Test
651+
fun testUnionsRdepsAcrossChangedModules() {
652+
// Guard against regressing to per-repo fan-out. Two changed modules matching two
653+
// canonical repos should produce a single unioned rdeps query, not two.
654+
val captured = mutableListOf<String>()
655+
val fakeQueryService: BazelQueryService = mock {
656+
onBlocking { query(any(), any()) } doAnswer {
657+
captured.add(it.getArgument(0))
658+
emptyList<BazelTarget>()
659+
}
660+
}
661+
loadKoinModules(module { single { fakeQueryService } })
662+
663+
val from = mapOf(
664+
"//:target1" to TargetHash("", "a", "a"),
665+
"@@abseil-cpp~20240116.2//:strings" to TargetHash("", "e1", "e1"),
666+
"@@aspect_bazel_lib~2.22.5//:copy_to_bin" to TargetHash("", "e2", "e2"),
667+
)
668+
val to = mapOf(
669+
"//:target1" to TargetHash("", "a", "a"),
670+
"@@abseil-cpp~20240722.0//:strings" to TargetHash("", "e1b", "e1b"),
671+
"@@aspect_bazel_lib~2.23.0//:copy_to_bin" to TargetHash("", "e2b", "e2b"),
672+
)
673+
val fromGraph = """
674+
{
675+
"key": "root", "name": "root", "version": "", "apparentName": "root",
676+
"dependencies": [
677+
{"key": "abseil-cpp@20240116.2", "name": "abseil-cpp", "version": "20240116.2", "apparentName": "abseil-cpp"},
678+
{"key": "aspect_bazel_lib@2.22.5", "name": "aspect_bazel_lib", "version": "2.22.5", "apparentName": "aspect_bazel_lib"}
679+
]
680+
}
681+
""".trimIndent()
682+
val toGraph = """
683+
{
684+
"key": "root", "name": "root", "version": "", "apparentName": "root",
685+
"dependencies": [
686+
{"key": "abseil-cpp@20240722.0", "name": "abseil-cpp", "version": "20240722.0", "apparentName": "abseil-cpp"},
687+
{"key": "aspect_bazel_lib@2.23.0", "name": "aspect_bazel_lib", "version": "2.23.0", "apparentName": "aspect_bazel_lib"}
688+
]
689+
}
690+
""".trimIndent()
691+
692+
val outputWriter = StringWriter()
693+
CalculateImpactedTargetsInteractor().execute(
694+
from = from,
695+
to = to,
696+
outputWriter = outputWriter,
697+
targetTypes = null,
698+
fromModuleGraphJson = fromGraph,
699+
toModuleGraphJson = toGraph,
700+
)
701+
702+
// Exactly one query - the whole point of this test. The per-repo loop would emit two.
703+
assertThat(captured).hasSize(1)
704+
val queryExpression = captured.single()
705+
assertThat(queryExpression).startsWith("rdeps(//..., ")
706+
assertThat(queryExpression).endsWith(")")
707+
// Both matched canonical repos must appear, joined by " + ".
708+
assertThat(queryExpression).contains("@@abseil-cpp~20240722.0//...")
709+
assertThat(queryExpression).contains("@@aspect_bazel_lib~2.23.0//...")
710+
assertThat(queryExpression).contains(" + ")
711+
}
642712
}

0 commit comments

Comments
 (0)