perf: union rdeps queries across changed modules into one subprocess

rdark · rdark · commit e8b2f802d5ff · 2026-04-28T07:56:25.000+01:00
CalculateImpactedTargetsInteractor.queryTargetsDependingOnModules
previously spawned one `bazel query rdeps(//..., @@&lt;repo&gt;//...)`
subprocess per changed-module-matching canonical repo. A single changed
module can substring-match thousands of repos in a bzlmod workspace,
and each subprocess pays ~2s of JVM + bazel-client-connect overhead
serially.

Union all matched repos into a single `rdeps(//..., @@a//... + @@b//...
+ ...)` query. Bazel computes the reverse-dep graph of //... once
regardless of how many patterns are in the union, so runtime collapses
from N × (startup + analysis) to 1 × (startup + analysis); the N-1
eliminated subprocesses are the bulk of the saving.

- No command-line length concern: BazelQueryService.runQuery writes
  queries via --query_file, so the query string is arbitrary size.
- Failure semantics: a single try/catch wraps the unioned query; on
  failure, fall back to marking all workspace targets impacted. The
  previous outer catch-all is removed - audit confirmed every throwable
  call now has a tight try/catch around it, and the broad catch was
  silently swallowing errors.
- Per-module log line preserves "module X → matched N repos: ..."
  attribution so as to not break logging contract
- Tests: adds testUnionsRdepsAcrossChangedModules with two changed
  modules to assert the single-query invariant.
diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractor.kt
@@ -277,12 +277,11 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
   }
 
   /**
-   * Queries Bazel to find all targets that depend on the changed modules.
+   * Queries Bazel to find all workspace targets that depend on any changed module.
    *
-   * This uses an efficient module-level query approach:
-   * 1. Identifies which external repository each changed module corresponds to
-   * 2. Uses `rdeps(//..., @@module~version//...)` to find workspace targets depending on each module
-   * 3. Returns the union of all impacted targets
+   * Maps every changed module to its matching bzlmod canonical repos, then issues a
+   * single `rdeps(//..., @@a//... + @@b//... + ...)` query. Bazel executes the union
+   * in one analysis pass, avoiding per-repo subprocess fan-out.
    *
    * @param changedModuleKeys Set of changed module keys (e.g., "abseil-cpp@20240722.0")
    * @param allTargets Map of all targets from the final revision
@@ -292,72 +291,59 @@ class CalculateImpactedTargetsInteractor : KoinComponent {
       changedModuleKeys: Set<String>,
       allTargets: Map<String, TargetHash>
   ): Set<String> {
-    return try {
-      // Inject BazelQueryService if available
-      val queryService: BazelQueryService? = try {
-        inject<BazelQueryService>().value
-      } catch (e: Exception) {
-        null
-      }
-
-      if (queryService == null) {
-        logger.w { "BazelQueryService not available - cannot query for module dependencies" }
-        return allTargets.keys
-      }
-
-      val impactedTargets = mutableSetOf<String>()
-
-      for (moduleKey in changedModuleKeys) {
-        // Extract module name from key (e.g., "abseil-cpp" from "abseil-cpp@20240722.0")
-        val moduleName = moduleKey.substringBefore("@")
-        logger.i { "Querying targets depending on module: $moduleName (key: $moduleKey)" }
-
-        // Find the canonical repository name for this module from allTargets
-        // Bzlmod repos look like: @@abseil-cpp~20240116.2//... or @@rules_jvm_external~~maven~maven//...
-        val moduleRepos = allTargets.keys
-            .filter { it.startsWith("@@") && it.contains(moduleName) }
-            .map { it.substring(2).substringBefore("//") } // Extract repo name
-            .toSet()
+    val queryService: BazelQueryService? = try {
+      inject<BazelQueryService>().value
+    } catch (e: Exception) {
+      null
+    }
 
-        if (moduleRepos.isEmpty()) {
-          logger.w { "No external repository found for module $moduleName" }
-          continue
-        }
+    if (queryService == null) {
+      logger.w { "BazelQueryService not available - cannot query for module dependencies" }
+      return allTargets.keys
+    }
 
-        logger.i { "Found ${moduleRepos.size} repositories for module $moduleName: ${moduleRepos.joinToString(", ")}" }
-
-        // Query workspace targets that depend on any target in the changed module repo
-        for (repoName in moduleRepos) {
-          try {
-            // Use rdeps to find all workspace targets depending on this module
-            // rdeps(universe, target_set) finds all targets in universe that depend on target_set
-            val queryExpression = "rdeps(//..., @@$repoName//...)"
-            logger.i { "Executing query: $queryExpression" }
-
-            val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
-            val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") } // Filter to workspace targets only
-
-            logger.i { "Found ${rdepLabels.size} workspace targets depending on @@$repoName" }
-            impactedTargets.addAll(rdepLabels)
-          } catch (e: Exception) {
-            logger.w { "Failed to query rdeps for @@$repoName: ${e.message}" }
-            logger.w { "Conservatively marking all targets as impacted for this module" }
-            // On error for this module, add all workspace targets
-            impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
-          }
-        }
+    // Map every changed module to its matching bzlmod canonical repos. A single module
+    // name can match multiple canonical repos (e.g. rules_jvm_external matches
+    // rules_jvm_external~~maven~maven, rules_jvm_external~~toolchains~...). Log per
+    // module so an operator can attribute a pathologically large impacted set back to
+    // a specific module bump.
+    val moduleRepos = mutableSetOf<String>()
+    for (moduleKey in changedModuleKeys) {
+      val moduleName = moduleKey.substringBefore("@")
+      val matched = allTargets.keys
+          .filter { it.startsWith("@@") && it.contains(moduleName) }
+          .map { it.substring(2).substringBefore("//") }
+      if (matched.isEmpty()) {
+        logger.w { "No external repository matched module $moduleKey" }
+      } else {
+        logger.i { "Module $moduleKey matched ${matched.size} repos: ${matched.joinToString(", ")}" }
+        moduleRepos.addAll(matched)
       }
+    }
 
-      // Add directly changed targets from hash comparison (e.g., code changes)
-      val directlyChanged = computeSimpleImpactedTargets(emptyMap(), allTargets)
-      impactedTargets.addAll(directlyChanged)
+    if (moduleRepos.isEmpty()) {
+      logger.i { "No external repositories matched any changed module" }
+      return computeSimpleImpactedTargets(emptyMap(), allTargets)
+    }
 
-      logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
-      impactedTargets
+    logger.i { "Querying rdeps for ${moduleRepos.size} repositories across ${changedModuleKeys.size} changed modules" }
+
+    val impactedTargets = mutableSetOf<String>()
+    try {
+      // Single unioned rdeps query: bazel executes the union in one analysis pass.
+      val queryExpression = "rdeps(//..., ${moduleRepos.joinToString(" + ") { "@@$it//..." }})"
+      val rdeps = runBlocking { queryService.query(queryExpression, useCquery = false) }
+      val rdepLabels = rdeps.map { it.name }.filter { !it.startsWith("@@") }
+      logger.i { "Found ${rdepLabels.size} workspace targets depending on changed modules" }
+      impactedTargets.addAll(rdepLabels)
     } catch (e: Exception) {
-      logger.e(e) { "Error querying targets depending on modules" }
-      // On error, conservatively mark all targets as impacted
-      allTargets.keys
+      logger.e(e) { "Unioned rdeps query failed - conservatively marking all workspace targets impacted" }
+      impactedTargets.addAll(allTargets.keys.filter { !it.startsWith("@@") })
     }
+
+    impactedTargets.addAll(computeSimpleImpactedTargets(emptyMap(), allTargets))
+
+    logger.i { "Total targets impacted by module changes: ${impactedTargets.size}" }
+    return impactedTargets
   }
 }
diff --git a/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt b/cli/src/test/kotlin/com/bazel_diff/interactor/CalculateImpactedTargetsInteractorTest.kt
@@ -2,14 +2,21 @@ package com.bazel_diff.interactor
 
 import assertk.assertThat
 import assertk.assertions.*
+import com.bazel_diff.bazel.BazelQueryService
+import com.bazel_diff.bazel.BazelTarget
 import com.bazel_diff.hash.TargetHash
 import com.bazel_diff.testModule
 import java.io.StringWriter
 import org.junit.Rule
 import org.junit.Test
+import org.koin.core.context.loadKoinModules
+import org.koin.dsl.module
 import org.koin.test.KoinTest
 import org.koin.test.KoinTestRule
 import org.mockito.junit.MockitoJUnit
+import org.mockito.kotlin.any
+import org.mockito.kotlin.doAnswer
+import org.mockito.kotlin.mock
 
 class CalculateImpactedTargetsInteractorTest : KoinTest {
   @get:Rule val mockitoRule = MockitoJUnit.rule()
@@ -639,4 +646,67 @@ class CalculateImpactedTargetsInteractorTest : KoinTest {
     // Only target1 should be impacted (hash changed) - module logic was skipped
     assertThat(output).containsExactly("//:target1")
   }
+
+  @Test
+  fun testUnionsRdepsAcrossChangedModules() {
+    // Guard against regressing to per-repo fan-out. Two changed modules matching two
+    // canonical repos should produce a single unioned rdeps query, not two.
+    val captured = mutableListOf<String>()
+    val fakeQueryService: BazelQueryService = mock {
+      onBlocking { query(any(), any()) } doAnswer {
+        captured.add(it.getArgument(0))
+        emptyList<BazelTarget>()
+      }
+    }
+    loadKoinModules(module { single { fakeQueryService } })
+
+    val from = mapOf(
+        "//:target1" to TargetHash("", "a", "a"),
+        "@@abseil-cpp~20240116.2//:strings" to TargetHash("", "e1", "e1"),
+        "@@aspect_bazel_lib~2.22.5//:copy_to_bin" to TargetHash("", "e2", "e2"),
+    )
+    val to = mapOf(
+        "//:target1" to TargetHash("", "a", "a"),
+        "@@abseil-cpp~20240722.0//:strings" to TargetHash("", "e1b", "e1b"),
+        "@@aspect_bazel_lib~2.23.0//:copy_to_bin" to TargetHash("", "e2b", "e2b"),
+    )
+    val fromGraph = """
+      {
+        "key": "root", "name": "root", "version": "", "apparentName": "root",
+        "dependencies": [
+          {"key": "abseil-cpp@20240116.2", "name": "abseil-cpp", "version": "20240116.2", "apparentName": "abseil-cpp"},
+          {"key": "aspect_bazel_lib@2.22.5", "name": "aspect_bazel_lib", "version": "2.22.5", "apparentName": "aspect_bazel_lib"}
+        ]
+      }
+    """.trimIndent()
+    val toGraph = """
+      {
+        "key": "root", "name": "root", "version": "", "apparentName": "root",
+        "dependencies": [
+          {"key": "abseil-cpp@20240722.0", "name": "abseil-cpp", "version": "20240722.0", "apparentName": "abseil-cpp"},
+          {"key": "aspect_bazel_lib@2.23.0", "name": "aspect_bazel_lib", "version": "2.23.0", "apparentName": "aspect_bazel_lib"}
+        ]
+      }
+    """.trimIndent()
+
+    val outputWriter = StringWriter()
+    CalculateImpactedTargetsInteractor().execute(
+        from = from,
+        to = to,
+        outputWriter = outputWriter,
+        targetTypes = null,
+        fromModuleGraphJson = fromGraph,
+        toModuleGraphJson = toGraph,
+    )
+
+    // Exactly one query - the whole point of this test. The per-repo loop would emit two.
+    assertThat(captured).hasSize(1)
+    val queryExpression = captured.single()
+    assertThat(queryExpression).startsWith("rdeps(//..., ")
+    assertThat(queryExpression).endsWith(")")
+    // Both matched canonical repos must appear, joined by " + ".
+    assertThat(queryExpression).contains("@@abseil-cpp~20240722.0//...")
+    assertThat(queryExpression).contains("@@aspect_bazel_lib~2.23.0//...")
+    assertThat(queryExpression).contains(" + ")
+  }
 }