Skip to content

Commit ee5da51

Browse files
committed
Throw exception if not get the pre-built hash table
1 parent 9b268b5 commit ee5da51

2 files changed

Lines changed: 24 additions & 13 deletions

File tree

cpp/velox/config/VeloxConfig.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ const std::string kVeloxSplitPreloadPerDriver = "spark.gluten.sql.columnar.backe
7979
const std::string kHashProbeDynamicFilterPushdownEnabled =
8080
"spark.gluten.sql.columnar.backend.velox.hashProbe.dynamicFilterPushdown.enabled";
8181

82+
const std::string kHashTableBuildOncePerExecutor = "spark.gluten.velox.buildHashTableOncePerExecutor.enabled";
83+
const bool kHashTableBuildOncePerExecutorDefault = true;
84+
8285
const std::string kHashProbeBloomFilterPushdownMaxSize =
8386
"spark.gluten.sql.columnar.backend.velox.hashProbe.bloomFilterPushdown.maxSize";
8487

cpp/velox/substrait/SubstraitToVeloxPlan.cc

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -399,24 +399,32 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
399399
} else if (
400400
sJoin.has_advanced_extension() &&
401401
SubstraitParser::configSetInOptimization(sJoin.advanced_extension(), "isBHJ=")) {
402+
bool hashTableBuildOncePerExecutorEnabled =
403+
veloxCfg_->get<bool>(kHashTableBuildOncePerExecutor, kHashTableBuildOncePerExecutorDefault);
404+
402405
std::string hashTableId = sJoin.hashtableid();
403406

404407
std::shared_ptr<core::OpaqueHashTable> opaqueSharedHashTable = nullptr;
405408
bool joinHasNullKeys = false;
406409

407-
try {
408-
auto hashTableBuilder = ObjectStore::retrieve<gluten::HashTableBuilder>(getJoin(hashTableId));
409-
joinHasNullKeys = hashTableBuilder->joinHasNullKeys();
410-
auto originalShared = hashTableBuilder->hashTable();
411-
opaqueSharedHashTable = std::shared_ptr<core::OpaqueHashTable>(
412-
originalShared, reinterpret_cast<core::OpaqueHashTable*>(originalShared.get()));
413-
414-
LOG(INFO) << "Successfully retrieved and aliased HashTable for reuse. ID: " << hashTableId;
415-
} catch (const std::exception& e) {
416-
LOG(WARNING)
417-
<< "Error retrieving HashTable from ObjectStore: " << e.what()
418-
<< ". Falling back to building new table. To ensure correct results, please verify that spark.gluten.velox.buildHashTableOncePerExecutor.enabled is set to false.";
419-
opaqueSharedHashTable = nullptr;
410+
if (hashTableBuildOncePerExecutorEnabled) {
411+
std::cout << "the hashTableBuildOncePerExecutorEnabled is set" << "\n";
412+
try {
413+
auto hashTableBuilder = ObjectStore::retrieve<gluten::HashTableBuilder>(getJoin(hashTableId));
414+
joinHasNullKeys = hashTableBuilder->joinHasNullKeys();
415+
auto originalShared = hashTableBuilder->hashTable();
416+
opaqueSharedHashTable = std::shared_ptr<core::OpaqueHashTable>(
417+
originalShared, reinterpret_cast<core::OpaqueHashTable*>(originalShared.get()));
418+
419+
LOG(INFO) << "Successfully retrieved and aliased HashTable for reuse. ID: " << hashTableId;
420+
} catch (const std::exception& e) {
421+
throw GlutenException(
422+
"Error retrieving HashTable from ObjectStore: " + std::string(e.what()) +
423+
" You can set spark.gluten.velox.buildHashTableOncePerExecutor.enabled"
424+
" to false as workaround.");
425+
}
426+
} else {
427+
std::cout << "the hashTableBuildOncePerExecutorEnabled is false" << "\n";
420428
}
421429

422430
// Create HashJoinNode node

0 commit comments

Comments
 (0)