Skip to content

Commit 4d185bc

Browse files
committed
[improvement](build) Harden Hive docker startup refresh path
### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Fix Hive docker startup regressions in refresh mode by making preinstalled HQL state incremental, wiring json-serde into HiveServer2, reducing noisy Hadoop copy logs, and ensuring JuiceFS metadata initialization relies on the docker mysql dependency path. ### Release note None ### Check List (For Author) - Test: Manual test - Hive thirdparties startup and log inspection - Behavior changed: Yes (Hive docker startup now auto-starts mysql when the default JuiceFS metadata DSN is used and refresh mode resumes preinstalled HQL incrementally) - Does this need documentation: No
1 parent bc64a11 commit 4d185bc

7 files changed

Lines changed: 118 additions & 15 deletions

File tree

docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,12 @@ services:
6565
environment:
6666
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://${IP_HOST}:${PG_PORT}/metastore"
6767
SERVICE_PRECONDITION: "${IP_HOST}:${HMS_PORT}"
68+
HIVE_SITE_CONF_hive_aux_jars_path: "file:///mnt/scripts/auxlib/json-serde-1.3.9-SNAPSHOT-jar-with-dependencies.jar"
6869
container_name: ${CONTAINER_UID}hive2-server
6970
expose:
7071
- "${HS_PORT}"
72+
volumes:
73+
- ./scripts:/mnt/scripts
7174
depends_on:
7275
datanode:
7376
condition: service_healthy

docker/thirdparties/docker-compose/hive/hive-2x_settings.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export HS_PORT=10000 # should be same as hive2ServerPort in regression-conf.groo
2626
export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy
2727

2828
# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
29-
# CI can override this env, e.g.:
29+
# CI can override this env, e.g. to point at the docker-published mysql_57 port:
3030
# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
3131
# default to mysql_57 (3316) because external pipeline always starts mysql, but not redis.
3232
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"

docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,12 @@ services:
6767
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://${IP_HOST}:${PG_PORT}/metastore"
6868
SERVICE_PRECONDITION: "${IP_HOST}:${HMS_PORT}"
6969
JVM_OPTS: -Xmx2g
70+
HIVE_SITE_CONF_hive_aux_jars_path: "file:///mnt/scripts/auxlib/json-serde-1.3.9-SNAPSHOT-jar-with-dependencies.jar"
7071
container_name: ${CONTAINER_UID}hive3-server
7172
expose:
7273
- "${HS_PORT}"
74+
volumes:
75+
- ./scripts:/mnt/scripts
7376
depends_on:
7477
datanode:
7578
condition: service_healthy

docker/thirdparties/docker-compose/hive/hive-3x_settings.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export HS_PORT=13000 # should be same as hive3ServerPort in regression-conf.groo
2626
export PG_PORT=5732 # should be same as hive3PgPort in regression-conf.groovy
2727

2828
# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
29-
# CI can override this env, e.g.:
29+
# CI can override this env, e.g. to point at the docker-published mysql_57 port:
3030
# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
3131
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
3232

docker/thirdparties/docker-compose/hive/scripts/bin/hadoop

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,30 @@ resolve_real_hadoop() {
4444

4545
REAL_HADOOP="$(resolve_real_hadoop)"
4646

47+
exec_quiet_hadoop_fs() {
48+
HADOOP_ROOT_LOGGER="${HADOOP_ROOT_LOGGER:-WARN,console}" exec "${REAL_HADOOP}" "$@"
49+
}
50+
4751
if [[ "$#" -ge 2 && "$1" == "fs" && "$2" == "-put" ]]; then
4852
shift 2
4953
case "${1:-}" in
5054
-f|-p|-l)
51-
exec "${REAL_HADOOP}" fs -put "$@"
55+
exec_quiet_hadoop_fs fs -put "$@"
56+
;;
57+
*)
58+
exec_quiet_hadoop_fs fs -put -f "$@"
59+
;;
60+
esac
61+
fi
62+
63+
if [[ "$#" -ge 2 && "$1" == "fs" && "$2" == "-copyFromLocal" ]]; then
64+
shift 2
65+
case "${1:-}" in
66+
-f|-p|-l)
67+
exec_quiet_hadoop_fs fs -copyFromLocal "$@"
5268
;;
5369
*)
54-
exec "${REAL_HADOOP}" fs -put -f "$@"
70+
exec_quiet_hadoop_fs fs -copyFromLocal -f "$@"
5571
;;
5672
esac
5773
fi

docker/thirdparties/docker-compose/hive/scripts/hive-module-lib.sh

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ module_state_file() {
6464
echo "${HIVE_STATE_DIR}/modules/${module}.sha"
6565
}
6666

67+
preinstalled_hql_state_file() {
68+
local relative_path="$1"
69+
local safe_name="${relative_path//\//__}"
70+
echo "${HIVE_STATE_DIR}/modules/preinstalled_hql__${safe_name}.sha"
71+
}
72+
6773
baseline_version_file() {
6874
echo "${HIVE_STATE_DIR}/baseline.version"
6975
}
@@ -122,10 +128,43 @@ calc_module_sha() {
122128
hash_files "${files[@]}"
123129
}
124130

131+
calc_preinstalled_hql_sha() {
132+
local hql_path="$1"
133+
hash_files "${hql_path}"
134+
}
135+
125136
module_needs_refresh() {
126137
local module="$1"
127138
local current_sha
128139
local recorded_sha_file
140+
local hql_path=""
141+
local relative_hql_path=""
142+
local current_file_sha=""
143+
local recorded_file_sha=""
144+
145+
if [[ "${module}" == "preinstalled_hql" ]]; then
146+
shopt -s nullglob
147+
for hql_path in /mnt/scripts/create_preinstalled_scripts/*.hql; do
148+
relative_hql_path="${hql_path#/mnt/scripts/}"
149+
if ! bootstrap_item_selected "${BOOTSTRAP_GROUPS}" "preinstalled_hql" "${relative_hql_path}"; then
150+
continue
151+
fi
152+
153+
current_file_sha="$(calc_preinstalled_hql_sha "${hql_path}")"
154+
recorded_sha_file="$(preinstalled_hql_state_file "${relative_hql_path}")"
155+
if [[ ! -f "${recorded_sha_file}" ]]; then
156+
shopt -u nullglob
157+
return 0
158+
fi
159+
recorded_file_sha="$(cat "${recorded_sha_file}")"
160+
if [[ "${recorded_file_sha}" != "${current_file_sha}" ]]; then
161+
shopt -u nullglob
162+
return 0
163+
fi
164+
done
165+
shopt -u nullglob
166+
return 1
167+
fi
129168

130169
current_sha="$(calc_module_sha "${module}")"
131170
recorded_sha_file="$(module_state_file "${module}")"
@@ -189,7 +228,9 @@ refresh_preinstalled_hql_module() {
189228
local preinstalled_hqls=()
190229
local hql_path=""
191230
local relative_hql_path=""
192-
local merged_preinstalled_hql="/tmp/merged-preinstalled.hql"
231+
local current_sha=""
232+
local state_file=""
233+
local refreshed=0
193234

194235
shopt -s nullglob
195236
for hql_path in /mnt/scripts/create_preinstalled_scripts/*.hql; do
@@ -203,8 +244,26 @@ refresh_preinstalled_hql_module() {
203244
if (( ${#preinstalled_hqls[@]} > 0 )); then
204245
IFS=$'\n' preinstalled_hqls=($(printf '%s\n' "${preinstalled_hqls[@]}" | sort))
205246
unset IFS
206-
bash /mnt/scripts/merge-preinstalled-hql.sh "${merged_preinstalled_hql}" "${preinstalled_hqls[@]}"
207-
run_hive_hql "${merged_preinstalled_hql}" "Merged preinstalled HQLs"
247+
for hql_path in "${preinstalled_hqls[@]}"; do
248+
relative_hql_path="${hql_path#/mnt/scripts/}"
249+
current_sha="$(calc_preinstalled_hql_sha "${hql_path}")"
250+
state_file="$(preinstalled_hql_state_file "${relative_hql_path}")"
251+
252+
if [[ -f "${state_file}" ]] && grep -Fxq "${current_sha}" "${state_file}"; then
253+
echo " [preinstalled_hql] up-to-date ${relative_hql_path}"
254+
continue
255+
fi
256+
257+
echo " [preinstalled_hql] BEGIN ${relative_hql_path}"
258+
run_hive_hql "${hql_path}" "${relative_hql_path}"
259+
printf '%s\n' "${current_sha}" >"${state_file}"
260+
echo " [preinstalled_hql] END ${relative_hql_path}"
261+
refreshed=1
262+
done
263+
264+
if (( refreshed == 0 )); then
265+
echo " [preinstalled_hql] all selected HQL files are up-to-date"
266+
fi
208267
fi
209268
}
210269

@@ -220,6 +279,7 @@ refresh_module() {
220279
;;
221280
preinstalled_hql)
222281
refresh_preinstalled_hql_module
282+
return 0
223283
;;
224284
view)
225285
run_hive_hql /mnt/scripts/create_view_scripts/create_view.hql "create_view.hql"

docker/thirdparties/run-thirdparties-docker.sh

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,25 @@ if [[ "${RUN_HIVE3}" -eq 1 ]] && [[ -z "${HIVE3_BOOTSTRAP_GROUPS+x}" ]]; then
260260
export HIVE3_BOOTSTRAP_GROUPS="common,hive3_only"
261261
fi
262262

263+
hive_requires_mysql_component() {
264+
local hive_version="$1"
265+
local jfs_meta=""
266+
local settings_env="${ROOT}/docker-compose/hive/hive-${hive_version#hive}x_settings.env"
267+
268+
# shellcheck disable=SC1090
269+
. "${settings_env}"
270+
jfs_meta="${JFS_CLUSTER_META:-}"
271+
[[ "${jfs_meta}" == mysql://* ]] || return 1
272+
[[ "${jfs_meta}" == *"@(127.0.0.1:3316)/"* || "${jfs_meta}" == *"@(localhost:3316)/"* ]]
273+
}
274+
275+
if [[ "${RUN_HIVE2}" -eq 1 ]] && hive_requires_mysql_component "hive2"; then
276+
RUN_MYSQL=1
277+
fi
278+
if [[ "${RUN_HIVE3}" -eq 1 ]] && hive_requires_mysql_component "hive3"; then
279+
RUN_MYSQL=1
280+
fi
281+
263282
reserve_ports() {
264283
if [[ "${NEED_RESERVE_PORTS}" -eq 0 ]]; then
265284
return
@@ -366,16 +385,18 @@ ensure_juicefs_meta_database() {
366385
meta_db="${jfs_meta##*/}"
367386
meta_db="${meta_db%%\?*}"
368387

369-
if command -v mysql >/dev/null 2>&1; then
370-
mysql -h127.0.0.1 -P3316 -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS \`${meta_db}\`;"
371-
return 0
372-
fi
373-
374388
mysql_container=$(sudo docker ps --format '{{.Names}}' | grep -E "(^|-)${CONTAINER_UID}mysql_57(-[0-9]+)?$" | head -n 1 || true)
375389
if [[ -n "${mysql_container}" ]]; then
376-
sudo docker exec "${mysql_container}" \
377-
mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS \`${meta_db}\`;"
390+
if sudo docker exec "${mysql_container}" \
391+
mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS \`${meta_db}\`;" >/dev/null 2>&1; then
392+
return 0
393+
fi
394+
echo "WARN: docker mysql ${mysql_container} is unavailable for JuiceFS metadata init." >&2
395+
return 0
378396
fi
397+
398+
echo "WARN: docker mysql_57 is not running; skip eager JuiceFS metadata database creation for ${meta_db}." >&2
399+
return 0
379400
}
380401

381402
run_juicefs_cli() {
@@ -423,7 +444,7 @@ prepare_juicefs_meta_for_hive() {
423444
sudo mkdir -p "${bucket_dir}"
424445
sudo chmod 777 "${bucket_dir}"
425446

426-
# For local mysql_57 metadata DSN, ensure metadata database exists.
447+
# For the default docker mysql_57 metadata DSN, ensure metadata database exists.
427448
ensure_juicefs_meta_database "${jfs_meta}"
428449

429450
if run_juicefs_cli status "${jfs_meta}" >/dev/null 2>&1; then

0 commit comments

Comments
 (0)