Azure
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-3_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-4_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-5_2-12/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/dev/README.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/dev/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala‎
Lines changed: 25 additions & 7 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosConfig.scala‎
Lines changed: 25 additions & 7 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadManyByPartitionKey.scala‎
Lines changed: 4 additions & 2 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadManyByPartitionKey.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala‎
Lines changed: 68 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_4-0_2-13/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ReadManyByPartitionKeyTest.java‎
Lines changed: 37 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ReadManyByPartitionKeyTest.java‎
Lines changed: 37 additions & 0 deletions
@@ -5,6 +5,7 @@
 #### Features Added
 * Added new `CosmosItemsDataSource.readManyByPartitionKeys` Spark function to execute bulk queries by a list of pk-values with better efficiency. Configure null handling via `spark.cosmos.read.readManyByPk.nullHandling` - default `Null` treats a null PK column as JSON null (`addNullValue`), `None` treats it as `PartitionKey.NONE` (`addNoneValue` / `NOT IS_DEFINED`). These route to different physical partitions - picking the wrong mode silently returns zero rows. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
 * Added Spark config `spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch` (default `1`) to bound the per-task prefetch parallelism the SDK uses inside `readManyByPartitionKeys`. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
+* Added Spark config `spark.cosmos.read.readManyByPk.maxBatchSize` (default `100`) to set the max. number of partition keys used for a single batch. See [PR 48930](https://github.com/Azure/azure-sdk-for-java/pull/48930)
 
 #### Breaking Changes
 
 
@@ -5,6 +5,7 @@
 #### Features Added
 * Added new `CosmosItemsDataSource.readManyByPartitionKeys` Spark function to execute bulk queries by a list of pk-values with better efficiency. Configure null handling via `spark.cosmos.read.readManyByPk.nullHandling` - default `Null` treats a null PK column as JSON null (`addNullValue`), `None` treats it as `PartitionKey.NONE` (`addNoneValue` / `NOT IS_DEFINED`). These route to different physical partitions - picking the wrong mode silently returns zero rows. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
 * Added Spark config `spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch` (default `1`) to bound the per-task prefetch parallelism the SDK uses inside `readManyByPartitionKeys`. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
+* Added Spark config `spark.cosmos.read.readManyByPk.maxBatchSize` (default `100`) to set the max. number of partition keys used for a single batch. See [PR 48930](https://github.com/Azure/azure-sdk-for-java/pull/48930)
 
 #### Breaking Changes
 
 
@@ -5,6 +5,7 @@
 #### Features Added
 * Added new `CosmosItemsDataSource.readManyByPartitionKeys` Spark function to execute bulk queries by a list of pk-values with better efficiency. Configure null handling via `spark.cosmos.read.readManyByPk.nullHandling` - default `Null` treats a null PK column as JSON null (`addNullValue`), `None` treats it as `PartitionKey.NONE` (`addNoneValue` / `NOT IS_DEFINED`). These route to different physical partitions - picking the wrong mode silently returns zero rows. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
 * Added Spark config `spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch` (default `1`) to bound the per-task prefetch parallelism the SDK uses inside `readManyByPartitionKeys`. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
+* Added Spark config `spark.cosmos.read.readManyByPk.maxBatchSize` (default `100`) to set the max. number of partition keys used for a single batch. See [PR 48930](https://github.com/Azure/azure-sdk-for-java/pull/48930)
 
 #### Breaking Changes
 
 
@@ -5,6 +5,7 @@
 #### Features Added
 * Added new `CosmosItemsDataSource.readManyByPartitionKeys` Spark function to execute bulk queries by a list of pk-values with better efficiency. Configure null handling via `spark.cosmos.read.readManyByPk.nullHandling` - default `Null` treats a null PK column as JSON null (`addNullValue`), `None` treats it as `PartitionKey.NONE` (`addNoneValue` / `NOT IS_DEFINED`). These route to different physical partitions - picking the wrong mode silently returns zero rows. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
 * Added Spark config `spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch` (default `1`) to bound the per-task prefetch parallelism the SDK uses inside `readManyByPartitionKeys`. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
+* Added Spark config `spark.cosmos.read.readManyByPk.maxBatchSize` (default `100`) to set the max. number of partition keys used for a single batch. See [PR 48930](https://github.com/Azure/azure-sdk-for-java/pull/48930)
 
 #### Breaking Changes
 
 
@@ -48,6 +48,7 @@ mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true
 mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-5_2-12 clean install
 mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-5_2-13 clean install
 mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_4-0_2-13 clean install
+mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_4-1_2-13 clean install
 ```
 
 Take these files:
 
@@ -94,6 +94,7 @@ private[spark] object CosmosConfigNames {
   val ReadManyFilteringEnabled = "spark.cosmos.read.readManyFiltering.enabled"
   val ReadManyByPkNullHandling = "spark.cosmos.read.readManyByPk.nullHandling"
   val ReadManyByPkMaxConcurrentBatchPrefetch = "spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch"
+  val ReadManyByPkMaxBatchSize = "spark.cosmos.read.readManyByPk.maxBatchSize"
   val ViewsRepositoryPath = "spark.cosmos.views.repositoryPath"
   val DiagnosticsMode = "spark.cosmos.diagnostics"
   val DiagnosticsSamplingMaxCount = "spark.cosmos.diagnostics.sampling.maxCount"
@@ -230,6 +231,7 @@ private[spark] object CosmosConfigNames {
     ReadManyFilteringEnabled,
     ReadManyByPkNullHandling,
     ReadManyByPkMaxConcurrentBatchPrefetch,
+    ReadManyByPkMaxBatchSize,
     ViewsRepositoryPath,
     DiagnosticsMode,
     DiagnosticsSamplingIntervalInSeconds,
@@ -1048,7 +1050,8 @@ private case class CosmosReadConfig(readConsistencyStrategy: ReadConsistencyStra
                                     readManyFilteringConfig: CosmosReadManyFilteringConfig,
                                     responseContinuationTokenLimitInKb: Option[Int] = None,
                                     readManyByPkTreatNullAsNone: Boolean = false,
-                                    readManyByPkMaxConcurrentBatchPrefetch: Int = 1)
+                                    readManyByPkMaxConcurrentBatchPrefetch: Option[Int] = None,
+                                    readManyByPkMaxBatchSize: Option[Int] = None)
 
 private object SchemaConversionModes extends Enumeration {
   type SchemaConversionMode = Value
@@ -1159,12 +1162,25 @@ private object CosmosReadConfig {
   private val ReadManyByPkMaxConcurrentBatchPrefetch = CosmosConfigEntry[Int](
     key = CosmosConfigNames.ReadManyByPkMaxConcurrentBatchPrefetch,
     mandatory = false,
-    defaultValue = Some(1),
+    defaultValue = None,
     parseFromStringFunction = value => Math.min(64, Math.max(1, value.toInt)),
     helpMessage = "The maximum number of per-physical-partition batches whose first page is prefetched " +
-      "concurrently inside a single Spark task by the SDK's readManyByPartitionKeys execution. The " +
-      "default is `1` - max is `64`, because Spark already parallelises across tasks - increase this when individual " +
-      "tasks span many physical partitions and additional intra-task prefetch is desired."
+      "concurrently inside a single Spark task by the SDK's readManyByPartitionKeys execution. When " +
+      "not set, the SDK default (`min(cpuCnt, 8)`) is used. Max is `64`, because Spark already " +
+      "parallelises across tasks - increase this when individual tasks span many physical partitions " +
+      "and additional intra-task prefetch is desired."
+  )
+
+  private val ReadManyByPkMaxBatchSize = CosmosConfigEntry[Int](
+    key = CosmosConfigNames.ReadManyByPkMaxBatchSize,
+    mandatory = false,
+    defaultValue = None,
+    parseFromStringFunction = value => Math.max(1, value.toInt),
+    helpMessage = "The maximum number of partition key values per batch query sent to a single " +
+      "physical partition. When not set, the SDK default (currently `100`, overridable via the " +
+      "`COSMOS.READ_MANY_BY_PK_MAX_BATCH_SIZE` system property / environment variable) is used. " +
+      "Increasing this value reduces the number of batches (and round-trips) but produces larger " +
+      "IN-clause queries that consume more RUs per request."
   )
 
   def parseCosmosReadConfig(cfg: Map[String, String]): CosmosReadConfig = {
@@ -1191,7 +1207,8 @@ private object CosmosReadConfig {
     val readManyFilteringConfig = CosmosReadManyFilteringConfig.parseCosmosReadManyFilterConfig(cfg)
     val readManyByPkNullHandling = CosmosConfigEntry.parse(cfg, ReadManyByPkNullHandling)
     val readManyByPkTreatNullAsNone = readManyByPkNullHandling.getOrElse("Null").equalsIgnoreCase("None")
-    val readManyByPkMaxConcurrentBatchPrefetch = CosmosConfigEntry.parse(cfg, ReadManyByPkMaxConcurrentBatchPrefetch).getOrElse(1)
+    val readManyByPkMaxConcurrentBatchPrefetch = CosmosConfigEntry.parse(cfg, ReadManyByPkMaxConcurrentBatchPrefetch)
+    val readManyByPkMaxBatchSize = CosmosConfigEntry.parse(cfg, ReadManyByPkMaxBatchSize)
 
     val effectiveReadConsistencyStrategy = if (readConsistencyStrategyOverride.getOrElse(ReadConsistencyStrategy.DEFAULT) != ReadConsistencyStrategy.DEFAULT) {
       readConsistencyStrategyOverride.get
@@ -1225,7 +1242,8 @@ private object CosmosReadConfig {
       readManyFilteringConfig,
       responseContinuationTokenLimitInKb,
       readManyByPkTreatNullAsNone,
-      readManyByPkMaxConcurrentBatchPrefetch)
+      readManyByPkMaxConcurrentBatchPrefetch,
+      readManyByPkMaxBatchSize)
   }
 }
 
 
@@ -51,7 +51,8 @@ private[spark] case class ItemsPartitionReaderWithReadManyByPartitionKey
 
   private val readConfig = CosmosReadConfig.parseCosmosReadConfig(config)
   ThroughputControlHelper.populateThroughputControlGroupName(readManyOptionsImpl, readConfig.throughputControlConfig)
-  readManyOptions.setMaxConcurrentBatchPrefetch(readConfig.readManyByPkMaxConcurrentBatchPrefetch)
+  readConfig.readManyByPkMaxConcurrentBatchPrefetch.foreach(readManyOptions.setMaxConcurrentBatchPrefetch)
+  readConfig.readManyByPkMaxBatchSize.foreach(readManyOptions.setMaxBatchSize)
 
   private val operationContext = {
     assert(taskContext != null)
@@ -226,7 +227,8 @@ private[spark] case class ItemsPartitionReaderWithReadManyByPartitionKey
             // fragile if the SDK ever stopped cloning options internally.
             private val fluxFactory: String => CosmosPagedFlux[SparkRowItem] = { (continuationToken: String) =>
               val perCallOptions = new CosmosReadManyByPartitionKeysRequestOptions()
-              perCallOptions.setMaxConcurrentBatchPrefetch(readConfig.readManyByPkMaxConcurrentBatchPrefetch)
+              readConfig.readManyByPkMaxConcurrentBatchPrefetch.foreach(perCallOptions.setMaxConcurrentBatchPrefetch)
+              readConfig.readManyByPkMaxBatchSize.foreach(perCallOptions.setMaxBatchSize)
               perCallOptions.setContinuationToken(continuationToken)
               val perCallOptionsImpl = ImplementationBridgeHelpers
                 .CosmosReadManyByPartitionKeysRequestOptionsHelper
 
@@ -458,6 +458,8 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait {
     config.readManyFilteringConfig.readManyFilteringEnabled shouldBe false
     config.readManyFilteringConfig.readManyFilterProperty shouldEqual "_itemIdentity"
     config.readManyByPkTreatNullAsNone shouldBe false
+    config.readManyByPkMaxConcurrentBatchPrefetch shouldBe None
+    config.readManyByPkMaxBatchSize shouldBe None
 
     userConfig = Map(
       "spark.cosmos.read.forceEventualConsistency" -> "false",
@@ -672,6 +674,72 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait {
     config.readManyByPkTreatNullAsNone shouldBe true
   }
 
+  it should "parse readManyByPk maxBatchSize configuration" in {
+    // Default (not specified) should be None - SDK applies its own default
+    var userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false"
+    )
+    var config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxBatchSize shouldBe None
+
+    // Explicit value
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxBatchSize" -> "50"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxBatchSize shouldBe Some(50)
+
+    // Value below 1 should be clamped to 1
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxBatchSize" -> "0"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxBatchSize shouldBe Some(1)
+
+    // Large value should be accepted
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxBatchSize" -> "500"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxBatchSize shouldBe Some(500)
+  }
+
+  it should "parse readManyByPk maxConcurrentBatchPrefetch configuration" in {
+    // Default (not specified) should be None - SDK applies its own default
+    var userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false"
+    )
+    var config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxConcurrentBatchPrefetch shouldBe None
+
+    // Explicit value
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch" -> "4"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxConcurrentBatchPrefetch shouldBe Some(4)
+
+    // Value above 64 should be clamped to 64
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch" -> "100"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxConcurrentBatchPrefetch shouldBe Some(64)
+
+    // Value below 1 should be clamped to 1
+    userConfig = Map(
+      "spark.cosmos.read.forceEventualConsistency" -> "false",
+      "spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch" -> "0"
+    )
+    config = CosmosReadConfig.parseCosmosReadConfig(userConfig)
+    config.readManyByPkMaxConcurrentBatchPrefetch shouldBe Some(1)
+  }
+
   it should "throw on invalid read configuration" in {
     val userConfig = Map(
       "spark.cosmos.read.schemaConversionMode" -> "not a valid value"
 
@@ -5,6 +5,7 @@
 #### Features Added
 * Added new `CosmosItemsDataSource.readManyByPartitionKeys` Spark function to execute bulk queries by a list of pk-values with better efficiency. Configure null handling via `spark.cosmos.read.readManyByPk.nullHandling` - default `Null` treats a null PK column as JSON null (`addNullValue`), `None` treats it as `PartitionKey.NONE` (`addNoneValue` / `NOT IS_DEFINED`). These route to different physical partitions - picking the wrong mode silently returns zero rows. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
 * Added Spark config `spark.cosmos.read.readManyByPk.maxConcurrentBatchPrefetch` (default `1`) to bound the per-task prefetch parallelism the SDK uses inside `readManyByPartitionKeys`. See [PR 48801](https://github.com/Azure/azure-sdk-for-java/pull/48801)
+* Added Spark config `spark.cosmos.read.readManyByPk.maxBatchSize` (default `100`) to set the max. number of partition keys used for a single batch. See [PR 48930](https://github.com/Azure/azure-sdk-for-java/pull/48930)
 
 #### Breaking Changes
 
 
@@ -505,6 +505,43 @@ public void singlePk_readManyByPartitionKey_withRequestOptionsAndMaxConcurrentBa
 
         cleanupContainer(singlePkContainer);
     }
+
+    @Test(groups = {"emulator"}, timeOut = TIMEOUT)
+    public void singlePk_readManyByPartitionKey_withRequestOptionsAndMaxBatchSize() {
+        // Exercises the per-request maxBatchSize override (precedence over global default).
+        // Use batch size of 1 so every PK ends up in its own batch — verifies results
+        // are still correctly assembled from many small batches.
+        List<ObjectNode> items = createSinglePkItems("batchSzPk1", 2);
+        items.addAll(createSinglePkItems("batchSzPk2", 2));
+        items.addAll(createSinglePkItems("batchSzPk3", 2));
+
+        List<PartitionKey> pkValues = Arrays.asList(
+            new PartitionKey("batchSzPk1"),
+            new PartitionKey("batchSzPk2"),
+            new PartitionKey("batchSzPk3"));
+
+        com.azure.cosmos.models.CosmosReadManyByPartitionKeysRequestOptions options =
+            new com.azure.cosmos.models.CosmosReadManyByPartitionKeysRequestOptions();
+        options.setMaxBatchSize(1);
+
+        CosmosPagedIterable<ObjectNode> results = singlePkContainer.readManyByPartitionKeys(
+            pkValues, options, ObjectNode.class);
+        List<ObjectNode> resultList = results.stream().collect(Collectors.toList());
+
+        assertThat(resultList).hasSize(6);
+        resultList.forEach(item -> {
+            assertThat(item.get("mypk").asText()).isIn("batchSzPk1", "batchSzPk2", "batchSzPk3");
+        });
+
+        cleanupContainer(singlePkContainer);
+    }
+
+    @Test(groups = {"emulator"}, timeOut = TIMEOUT, expectedExceptions = IllegalArgumentException.class)
+    public void singlePk_readManyByPartitionKey_setMaxBatchSizeZeroThrows() {
+        com.azure.cosmos.models.CosmosReadManyByPartitionKeysRequestOptions options =
+            new com.azure.cosmos.models.CosmosReadManyByPartitionKeysRequestOptions();
+        options.setMaxBatchSize(0); // must throw IllegalArgumentException
+    }
     //endregion
 
     //region Continuation token tests