Skip to content

Commit 35b7619

Browse files
authored
chore: tweak CI execution memory params (#4687)
1 parent 05704a4 commit 35b7619

5 files changed

Lines changed: 40 additions & 28 deletions

File tree

.github/workflows/spark_sql_test_reusable.yml

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,9 @@ jobs:
169169
module:
170170
- {name: "catalyst", args1: "catalyst/test", args2: ""}
171171
# sql_core-* set HEAP_SIZE / METASPACE_SIZE so SparkBuild.scala caps
172-
- {name: "sql_core-1", args1: "", args2: "sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest", heap: "4g", metaspace: "1g"}
173-
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest", heap: "4g", metaspace: "1g"}
174-
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest", heap: "4g", metaspace: "1g"}
172+
- {name: "sql_core-1", args1: "", args2: "sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest", heap: "3g", metaspace: "1g"}
173+
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest", heap: "3g", metaspace: "1g"}
174+
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest", heap: "3g", metaspace: "1g"}
175175
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
176176
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
177177
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
@@ -223,9 +223,7 @@ jobs:
223223
if [ "${{ inputs.spark-short }}" != "4.0" ] || [ "${{ inputs.java }}" != "21" ]; then
224224
export SERIAL_SBT_TESTS=1
225225
fi
226-
# Per-row forked-test-JVM caps (read by Spark's SparkBuild.scala). Only
227-
# exported when the matrix entry sets them; rows without these fields
228-
# keep Spark's defaults (-Xmx4g, -XX:MaxMetaspaceSize=1300m).
226+
# Per-row forked-test-JVM caps (read by Spark's SparkBuild.scala).
229227
if [ -n "${{ matrix.module.heap }}" ]; then
230228
export HEAP_SIZE="${{ matrix.module.heap }}"
231229
fi
@@ -247,10 +245,23 @@ jobs:
247245
# The build-jvm job pre-compiled Spark sources and Test classes, so
248246
# SBT here only orchestrates `testOnly` — Zinc verifies "no changes"
249247
# against the unpacked apache-spark/ tree and skips compilation. We
250-
# cap SBT heap at 1536 MB (down from 3072 MB) so the freed RAM goes
248+
# cap SBT heap so the freed RAM goes
251249
# to the forked test JVM and OS/container overhead, fixing the
252250
# cgroup-OOM SIGKILLs we saw on sql_core-* under 7 GB runners.
253-
SBT_MEM: "1536"
251+
SBT_MEM: "1024"
252+
# G1GC + tuning for the SBT orchestrator JVM. -Xss4m replaces the
253+
# launcher's -Xss64m default (no compile here, deep recursion not
254+
# needed). UseStringDeduplication and MaxMetaspaceSize cap real
255+
# and ceiling footprint. ExitOnOutOfMemoryError fails fast.
256+
SBT_OPTS: >-
257+
-Xss4m
258+
-XX:+UseG1GC
259+
-XX:+UseStringDeduplication
260+
-XX:MaxMetaspaceSize=384m
261+
-XX:G1HeapRegionSize=2m
262+
-XX:InitiatingHeapOccupancyPercent=35
263+
-XX:+ParallelRefProcEnabled
264+
-XX:+ExitOnOutOfMemoryError
254265
# Mirror Spark's own JDK 21 / 25 CI workaround. apache/spark's
255266
# build_java21.yml and build_java25.yml set this same env var to
256267
# process-isolate the V1/V2 Parquet and Orc source suites because

dev/diffs/3.4.3.diff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/pom.xml b/pom.xml
2-
index d3544881af1..1126f287096 100644
2+
index d3544881af1..aae0ae3b27b 100644
33
--- a/pom.xml
44
+++ b/pom.xml
55
@@ -148,6 +148,8 @@
@@ -2977,7 +2977,7 @@ index dd55fcfe42c..d9a3f2df535 100644
29772977

29782978
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
29792979
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
2980-
index ed2e309fa07..25b798d2c1c 100644
2980+
index ed2e309fa07..0658bfe9e12 100644
29812981
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
29822982
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
29832983
@@ -74,6 +74,20 @@ trait SharedSparkSessionBase
@@ -2996,7 +2996,7 @@ index ed2e309fa07..25b798d2c1c 100644
29962996
+ .set("spark.shuffle.manager",
29972997
+ "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
29982998
+ .set("spark.comet.exec.shuffle.enabled", "true")
2999-
+ .set("spark.comet.memoryOverhead", "10g")
2999+
+ .set("spark.comet.memoryOverhead", "2g")
30003000
+ }
30013001
conf.set(
30023002
StaticSQLConf.WAREHOUSE_PATH,

dev/diffs/3.5.8.diff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/pom.xml b/pom.xml
2-
index edd2ad57880..15a0947abf4 100644
2+
index edd2ad57880..a47b7dec672 100644
33
--- a/pom.xml
44
+++ b/pom.xml
55
@@ -152,6 +152,8 @@
@@ -2926,7 +2926,7 @@ index e937173a590..263934fbe7b 100644
29262926

29272927
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
29282928
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
2929-
index ed2e309fa07..25b798d2c1c 100644
2929+
index ed2e309fa07..0658bfe9e12 100644
29302930
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
29312931
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
29322932
@@ -74,6 +74,20 @@ trait SharedSparkSessionBase
@@ -2945,7 +2945,7 @@ index ed2e309fa07..25b798d2c1c 100644
29452945
+ .set("spark.shuffle.manager",
29462946
+ "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
29472947
+ .set("spark.comet.exec.shuffle.enabled", "true")
2948-
+ .set("spark.comet.memoryOverhead", "10g")
2948+
+ .set("spark.comet.memoryOverhead", "2g")
29492949
+ }
29502950
conf.set(
29512951
StaticSQLConf.WAREHOUSE_PATH,

dev/diffs/4.0.2.diff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ index 6c51bd4ff2e..e72ec1d26e2 100644
3939
withSpark(sc) { sc =>
4040
TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
4141
diff --git a/pom.xml b/pom.xml
42-
index 252cfdf9073..64e899efe6b 100644
42+
index 252cfdf9073..60cb9dcb7cf 100644
4343
--- a/pom.xml
4444
+++ b/pom.xml
4545
@@ -148,6 +148,8 @@
@@ -3590,7 +3590,7 @@ index f0f3f94b811..b7d18771314 100644
35903590

35913591
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
35923592
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
3593-
index 245219c1756..b566f970ccd 100644
3593+
index 245219c1756..2213d438a0c 100644
35943594
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
35953595
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
35963596
@@ -75,6 +75,21 @@ trait SharedSparkSessionBase
@@ -3609,7 +3609,7 @@ index 245219c1756..b566f970ccd 100644
36093609
+ .set("spark.shuffle.manager",
36103610
+ "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
36113611
+ .set("spark.comet.exec.shuffle.enabled", "true")
3612-
+ .set("spark.comet.memoryOverhead", "10g")
3612+
+ .set("spark.comet.memoryOverhead", "2g")
36133613
+
36143614
+ }
36153615
conf.set(

dev/diffs/4.1.2.diff

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ index 6df8bc85b51..dabb75e2b75 100644
3939
withSpark(sc) { sc =>
4040
TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
4141
diff --git a/pom.xml b/pom.xml
42-
index dc201151999..3e278cfb34c 100644
42+
index dc201151999..d5c08f11ded 100644
4343
--- a/pom.xml
4444
+++ b/pom.xml
4545
@@ -152,6 +152,8 @@
@@ -885,7 +885,7 @@ index 53e47f428c3..a55d8f0c161 100644
885885
assert(shuffleMergeJoins.size == 1)
886886
}
887887
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
888-
index 885512d4d19..113ae17ad9f 100644
888+
index 885512d4d19..09b1ccaed71 100644
889889
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
890890
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
891891
@@ -29,7 +29,8 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
@@ -1055,7 +1055,7 @@ index 885512d4d19..113ae17ad9f 100644
10551055
// No extra shuffle before aggregation
10561056
assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 0)
10571057
}
1058-
@@ -1501,7 +1523,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1058+
@@ -1501,7 +1526,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
10591059
val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
10601060
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
10611061
// Have shuffle before aggregation
@@ -1065,7 +1065,8 @@ index 885512d4d19..113ae17ad9f 100644
10651065
}
10661066

10671067
def getJoinQuery(selectExpr: String, joinType: String): String = {
1068-
@@ -1530,9 +1556,15 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1068+
@@ -1529,10 +1555,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1069+
"/*+ BROADCAST(right_t) */ k1 as k0"
10691070
}
10701071
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
10711072
- assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
@@ -1084,7 +1085,7 @@ index 885512d4d19..113ae17ad9f 100644
10841085
}
10851086

10861087
// Test output ordering is not preserved
1087-
@@ -1541,9 +1567,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1088+
@@ -1541,9 +1573,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
10881089
val selectExpr = "/*+ BROADCAST(left_t) */ k1 as k0"
10891090
val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
10901091
assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
@@ -1099,7 +1100,7 @@ index 885512d4d19..113ae17ad9f 100644
10991100
}
11001101

11011102
// Test singe partition
1102-
@@ -1553,7 +1582,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1103+
@@ -1553,7 +1588,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
11031104
|FROM range(0, 10, 1, 1) t1 FULL OUTER JOIN range(0, 10, 1, 1) t2
11041105
|""".stripMargin)
11051106
val plan = fullJoinDF.queryExecution.executedPlan
@@ -1109,7 +1110,7 @@ index 885512d4d19..113ae17ad9f 100644
11091110
checkAnswer(fullJoinDF, Row(100))
11101111
}
11111112
}
1112-
@@ -1626,6 +1656,9 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1113+
@@ -1626,6 +1662,9 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
11131114
Seq(semiJoinDF, antiJoinDF).foreach { df =>
11141115
assert(collect(df.queryExecution.executedPlan) {
11151116
case j: ShuffledHashJoinExec if j.ignoreDuplicatedKey == ignoreDuplicatedKey => true
@@ -1119,7 +1120,7 @@ index 885512d4d19..113ae17ad9f 100644
11191120
}.size == 1)
11201121
}
11211122
}
1122-
@@ -1670,14 +1703,20 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
1123+
@@ -1670,14 +1709,20 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
11231124

11241125
test("SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") {
11251126
def check(plan: SparkPlan): Unit = {
@@ -1142,7 +1143,7 @@ index 885512d4d19..113ae17ad9f 100644
11421143
}
11431144
dupStreamSideColTest("SHUFFLE_HASH", check)
11441145
}
1145-
@@ -1813,7 +1852,8 @@ class ThreadLeakInSortMergeJoinSuite
1146+
@@ -1813,7 +1858,8 @@ class ThreadLeakInSortMergeJoinSuite
11461147
sparkConf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, 20))
11471148
}
11481149

@@ -3849,7 +3850,7 @@ index f0f3f94b811..b7d18771314 100644
38493850

38503851
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
38513852
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
3852-
index 720b13b812e..71b20c79a12 100644
3853+
index 720b13b812e..d08c2548ffa 100644
38533854
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
38543855
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
38553856
@@ -98,6 +98,21 @@ trait SharedSparkSessionBase
@@ -3868,7 +3869,7 @@ index 720b13b812e..71b20c79a12 100644
38683869
+ .set("spark.shuffle.manager",
38693870
+ "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
38703871
+ .set("spark.comet.exec.shuffle.enabled", "true")
3871-
+ .set("spark.comet.memoryOverhead", "10g")
3872+
+ .set("spark.comet.memoryOverhead", "2g")
38723873
+
38733874
+ }
38743875
conf.set(

0 commit comments

Comments
 (0)