Skip to content

Commit aca9035

Browse files
committed
[improvement](build) Optimize Hive thirdparty startup and refresh workflow
### What problem does this PR solve? Related Issue: #62101 Related PR: #62102 Problem Summary: This PR consolidates Hive thirdparty startup improvements to make refresh/rebuild behavior more predictable, reduce startup overhead, and improve operational observability. Key updates include: - introduce structured hive startup modes () and module-selective refresh () - persist and reuse hive state, with SHA-based incremental refresh for modules and preinstalled HQL files - reduce refresh/startup log noise (xtrace gating, obsolete compose version cleanup, cleaner refresh stage logs) - make Hive bootstrap scripts/HQL idempotent with drop-then-create style and repeatable reruns - optimize healthy refresh path by skipping unnecessary compose-up steps - switch JuiceFS default metadata backend for Hive to metastore PostgreSQL and remove auto-MySQL dependency - add Hive README documenting component segmentation, startup modes, module refresh, and troubleshooting ### Release note None ### Check List (For Author) - Test: Manual test - Ran hive3 refresh and module-scoped refresh via run-thirdparties-docker.sh - Behavior changed: Yes (Hive startup and refresh behavior is now mode/module driven and defaults to PostgreSQL-backed JuiceFS metadata) - Does this need documentation: No
1 parent 3160231 commit aca9035

208 files changed

Lines changed: 2779 additions & 1513 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

be/src/exec/operator/hashjoin_build_sink.cpp

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -237,18 +237,7 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu
237237

238238
if (p._use_shared_hash_table) {
239239
std::unique_lock lock(p._mutex);
240-
// Only signal non-builder tasks when the builder actually built the hash table.
241-
// When the builder is terminated (woken up early because the probe side finished
242-
// first), it never called process_build_block() so the hash table variant is still
243-
// monostate. Setting _signaled=true in that case would cause non-builder tasks to
244-
// enter std::visit on monostate and crash with "Hash table type mismatch".
245-
//
246-
// _terminated is reliably true here when the task was woken up early, because
247-
// operator terminate() is called from the execute() Defer in PipelineTask
248-
// before close() is invoked.
249-
if (!_terminated) {
250-
p._signaled = true;
251-
}
240+
p._signaled = true;
252241
for (auto& dep : _shared_state->sink_deps) {
253242
dep->set_ready();
254243
}
@@ -851,20 +840,12 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, Block* in_block, bo
851840
RETURN_IF_ERROR(local_state.build_asof_index(*local_state._shared_state->build_block));
852841
local_state.init_short_circuit_for_probe();
853842
} else if (!local_state._should_build_hash_table) {
854-
// The non-builder instance waits for the builder (task 0) to finish building the hash table.
855-
// If _signaled is false, either the builder hasn't finished yet, or the builder was
856-
// terminated (woken up early) without building the hash table — in both cases, return EOF.
857-
//
858-
// The close() Defer in the builder conditionally sets _signaled=true ONLY when the builder
859-
// was NOT terminated (i.e., the hash table was actually built). When the builder is
860-
// terminated, _signaled stays false, so non-builders always hit this guard and return EOF
861-
// safely — never reaching the std::visit on an uninitialized (monostate) hash table.
862-
//
863-
// At this point, termination is reflected solely through the value of _signaled: a
864-
// terminated builder never sets _signaled to true. Checking !_signaled is therefore
865-
// sufficient and serves as the real guard against racing with an uninitialized hash table.
866-
if (!_signaled) {
867-
return Status::Error<ErrorCode::END_OF_FILE>("source has closed");
843+
// the instance which is not build hash table, it's should wait the signal of hash table build finished.
844+
// but if it's running and signaled == false, maybe the source operator have closed caused by some short circuit
845+
// return eof will make task marked as wake_up_early
846+
// todo: remove signaled after we can guarantee that wake up eraly is always set accurately
847+
if (!_signaled || local_state._terminated) {
848+
return Status::Error<ErrorCode::END_OF_FILE>("source have closed");
868849
}
869850

870851
DCHECK_LE(local_state._task_idx,

be/src/exec/pipeline/pipeline_task.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,20 +1063,12 @@ Status PipelineTask::_state_transition(State new_state) {
10631063
}
10641064
_task_profile->add_info_string("TaskState", _to_string(new_state));
10651065
_task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
1066-
const auto& table =
1067-
_wake_up_early ? WAKE_UP_EARLY_LEGAL_STATE_TRANSITION : LEGAL_STATE_TRANSITION;
1068-
if (!table[(int)new_state].contains(_exec_state)) {
1066+
if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
10691067
return Status::InternalError(
10701068
"Task state transition from {} to {} is not allowed! Task info: {}",
10711069
_to_string(_exec_state), _to_string(new_state), debug_string());
10721070
}
1073-
// FINISHED/FINALIZED → RUNNABLE is legal under wake_up_early (delayed wake_up() arriving
1074-
// after the task already terminated), but we must not actually move the state backwards.
1075-
bool need_move = !((_exec_state == State::FINISHED || _exec_state == State::FINALIZED) &&
1076-
new_state == State::RUNNABLE);
1077-
if (need_move) {
1078-
_exec_state = new_state;
1079-
}
1071+
_exec_state = new_state;
10801072
return Status::OK();
10811073
}
10821074

be/src/exec/pipeline/pipeline_task.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -279,19 +279,11 @@ class PipelineTask : public std::enable_shared_from_this<PipelineTask> {
279279
std::shared_ptr<MemTrackerLimiter> _query_mem_tracker;
280280

281281
/**
282-
* Normal state machine:
283282
*
284283
* INITED -----> RUNNABLE -------------------------+----> FINISHED ---+---> FINALIZED
285284
* ^ | |
286285
* | | |
287286
* +----------- BLOCKED <--------+------------------+
288-
*
289-
* When _wake_up_early is set by make_all_runnable(), additional transitions are allowed:
290-
* BLOCKED → FINISHED : task skips RUNNABLE, terminates directly
291-
* FINISHED → RUNNABLE : delayed wake_up() arrives after task already finished,
292-
* legal but no-op (state stays FINISHED)
293-
* FINALIZED → RUNNABLE : same as above but task already finalized,
294-
* legal but no-op (state stays FINALIZED)
295287
*/
296288
enum class State : int {
297289
INITED,
@@ -307,15 +299,6 @@ class PipelineTask : public std::enable_shared_from_this<PipelineTask> {
307299
{State::RUNNABLE}, // Target state is FINISHED
308300
{State::INITED, State::FINISHED}}; // Target state is FINALIZED
309301

310-
// Extended table used when _wake_up_early is true.
311-
const std::vector<std::set<State>> WAKE_UP_EARLY_LEGAL_STATE_TRANSITION = {
312-
{}, // INITED
313-
{State::INITED, State::RUNNABLE, State::BLOCKED, State::FINISHED,
314-
State::FINALIZED}, // RUNNABLE (+ FINISHED, FINALIZED)
315-
{State::RUNNABLE, State::FINISHED}, // BLOCKED
316-
{State::RUNNABLE, State::BLOCKED}, // FINISHED (+ BLOCKED)
317-
{State::INITED, State::FINISHED}}; // FINALIZED
318-
319302
std::string _to_string(State state) const {
320303
switch (state) {
321304
case State::INITED:

0 commit comments

Comments
 (0)