@@ -296,6 +296,28 @@ object CometConf extends ShimCometConf {
296296 val COMET_EXEC_LOCAL_TABLE_SCAN_ENABLED : ConfigEntry [Boolean ] =
297297 createExecEnabledConfig(" localTableScan" , defaultValue = false )
298298
299+ val COMET_EXEC_GRACE_HASH_JOIN_NUM_PARTITIONS : ConfigEntry [Int ] =
300+ conf(s " $COMET_EXEC_CONFIG_PREFIX.graceHashJoin.numPartitions " )
301+ .category(CATEGORY_EXEC )
302+ .doc(" The number of partitions (buckets) to use for Grace Hash Join. A higher number " +
303+ " reduces the size of each partition but increases overhead." )
304+ .intConf
305+ .checkValue(v => v > 0 , " The number of partitions must be positive." )
306+ .createWithDefault(16 )
307+
308+ val COMET_EXEC_GRACE_HASH_JOIN_FAST_PATH_THRESHOLD : ConfigEntry [Long ] =
309+ conf(s " $COMET_EXEC_CONFIG_PREFIX.graceHashJoin.fastPathThreshold " )
310+ .category(CATEGORY_EXEC )
311+ .doc(
312+ " Per-task memory budget in bytes for Grace Hash Join fast-path hash tables. " +
313+ " When a build side fits in memory and is smaller than this threshold, " +
314+ " the join executes as a single HashJoinExec without partitioning or spilling. " +
315+ " Set to 0 to disable the fast path. Larger values risk OOM because HashJoinExec " +
316+ " creates non-spillable hash tables." )
317+ .longConf
318+ .checkValue(v => v >= 0 , " The fast path threshold must be non-negative." )
319+ .createWithDefault(64L * 1024 * 1024 ) // 64 MB
320+
299321 val COMET_NATIVE_COLUMNAR_TO_ROW_ENABLED : ConfigEntry [Boolean ] =
300322 conf(s " $COMET_EXEC_CONFIG_PREFIX.columnarToRow.native.enabled " )
301323 .category(CATEGORY_EXEC )
@@ -383,6 +405,18 @@ object CometConf extends ShimCometConf {
383405 .booleanConf
384406 .createWithDefault(false )
385407
408+ val COMET_REPLACE_SMJ_MAX_BUILD_SIZE : ConfigEntry [Long ] =
409+ conf(s " $COMET_EXEC_CONFIG_PREFIX.replaceSortMergeJoin.maxBuildSize " )
410+ .category(CATEGORY_EXEC )
411+ .doc(
412+ " Maximum estimated size in bytes of the build side for replacing SortMergeJoin " +
413+ " with ShuffledHashJoin. When the build side's logical plan statistics exceed this " +
414+ " threshold, the SortMergeJoin is kept because sort-merge join's streaming merge " +
415+ " on pre-sorted data outperforms hash join's per-task hash table construction " +
416+ " for large build sides. Set to -1 to disable this check and always replace." )
417+ .longConf
418+ .createWithDefault(100L * 1024 * 1024 ) // 100 MB
419+
386420 val COMET_EXEC_SHUFFLE_WITH_HASH_PARTITIONING_ENABLED : ConfigEntry [Boolean ] =
387421 conf(" spark.comet.native.shuffle.partitioning.hash.enabled" )
388422 .category(CATEGORY_SHUFFLE )
0 commit comments