Skip to content

Commit 7b7c75a

Browse files
committed
[improvement](build) Remove Hive JuiceFS hard dependency on MySQL
### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Switch Hive JuiceFS metadata default to Hive metastore PostgreSQL, add PostgreSQL metadata database bootstrap in startup script, and remove logic that auto-enables mysql when starting hive2/hive3. ### Release note None ### Check List (For Author) - Test: Manual test - Regression test / Unit Test / Manual test / No need to test (with reason) - Ran: sudo bash docker/thirdparties/run-thirdparties-docker.sh -c hive3 --hive-mode refresh --hive-modules preinstalled_hql - Behavior changed: Yes (hive startup no longer auto-requires mysql for JuiceFS metadata by default) - Does this need documentation: No
1 parent 25e5e66 commit 7b7c75a

File tree

3 files changed

+62
-34
lines changed

3 files changed

+62
-34
lines changed

docker/thirdparties/docker-compose/hive/hive-2x_settings.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy
2828
# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
2929
# CI can override this env, e.g. to point at the docker-published mysql_57 port:
3030
# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
31-
# default to mysql_57 (3316) because external pipeline always starts mysql, but not redis.
32-
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
31+
# default to hive metastore postgresql to avoid external mysql dependency.
32+
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-postgres://postgres@127.0.0.1:${PG_PORT}/juicefs_meta?sslmode=disable}"

docker/thirdparties/docker-compose/hive/hive-3x_settings.env

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ export HS_PORT=13000 # should be same as hive3ServerPort in regression-conf.groo
2626
export PG_PORT=5732 # should be same as hive3PgPort in regression-conf.groovy
2727

2828
# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
29-
# CI can override this env, e.g. to point at the docker-published mysql_57 port:
29+
# Default to hive metastore postgresql to avoid external mysql dependency.
30+
# CI can still override this env, e.g.:
3031
# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
31-
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
32+
export JFS_CLUSTER_META="${JFS_CLUSTER_META:-postgres://postgres@127.0.0.1:${PG_PORT}/juicefs_meta?sslmode=disable}"
3233

3334
# prepare for paimon hms test,control load paimon hms data or not
3435
export enablePaimonHms="false"

docker/thirdparties/run-thirdparties-docker.sh

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -260,25 +260,6 @@ if [[ "${RUN_HIVE3}" -eq 1 ]] && [[ -z "${HIVE3_BOOTSTRAP_GROUPS+x}" ]]; then
260260
export HIVE3_BOOTSTRAP_GROUPS="common,hive3_only"
261261
fi
262262

263-
hive_requires_mysql_component() {
264-
local hive_version="$1"
265-
local jfs_meta=""
266-
local settings_env="${ROOT}/docker-compose/hive/hive-${hive_version#hive}x_settings.env"
267-
268-
# shellcheck disable=SC1090
269-
. "${settings_env}"
270-
jfs_meta="${JFS_CLUSTER_META:-}"
271-
[[ "${jfs_meta}" == mysql://* ]] || return 1
272-
[[ "${jfs_meta}" == *"@(127.0.0.1:3316)/"* || "${jfs_meta}" == *"@(localhost:3316)/"* ]]
273-
}
274-
275-
if [[ "${RUN_HIVE2}" -eq 1 ]] && hive_requires_mysql_component "hive2"; then
276-
RUN_MYSQL=1
277-
fi
278-
if [[ "${RUN_HIVE3}" -eq 1 ]] && hive_requires_mysql_component "hive3"; then
279-
RUN_MYSQL=1
280-
fi
281-
282263
reserve_ports() {
283264
if [[ "${NEED_RESERVE_PORTS}" -eq 0 ]]; then
284265
return
@@ -377,25 +358,68 @@ ensure_juicefs_meta_database() {
377358
local jfs_meta="$1"
378359
local meta_db
379360
local mysql_container
361+
local pg_container
362+
local -a pg_candidates
380363

381-
if [[ "${jfs_meta}" != *"@(127.0.0.1:3316)/"* && "${jfs_meta}" != *"@(localhost:3316)/"* ]]; then
364+
meta_db="${jfs_meta##*/}"
365+
meta_db="${meta_db%%\?*}"
366+
if [[ ! "${meta_db}" =~ ^[A-Za-z0-9_]+$ ]]; then
367+
echo "WARN: skip JuiceFS metadata database creation for unsafe database name '${meta_db}'." >&2
382368
return 0
383369
fi
384370

385-
meta_db="${jfs_meta##*/}"
386-
meta_db="${meta_db%%\?*}"
371+
if [[ "${jfs_meta}" == mysql://* ]]; then
372+
if [[ "${jfs_meta}" != *"@(127.0.0.1:3316)/"* && "${jfs_meta}" != *"@(localhost:3316)/"* ]]; then
373+
return 0
374+
fi
387375

388-
mysql_container=$(sudo docker ps --format '{{.Names}}' | grep -E "(^|-)${CONTAINER_UID}mysql_57(-[0-9]+)?$" | head -n 1 || true)
389-
if [[ -n "${mysql_container}" ]]; then
390-
if sudo docker exec "${mysql_container}" \
391-
mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS \`${meta_db}\`;" >/dev/null 2>&1; then
376+
mysql_container=$(sudo docker ps --format '{{.Names}}' | grep -E "(^|-)${CONTAINER_UID}mysql_57(-[0-9]+)?$" | head -n 1 || true)
377+
if [[ -n "${mysql_container}" ]]; then
378+
if sudo docker exec "${mysql_container}" \
379+
mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS \`${meta_db}\`;" >/dev/null 2>&1; then
380+
return 0
381+
fi
382+
echo "WARN: docker mysql ${mysql_container} is unavailable for JuiceFS metadata init." >&2
392383
return 0
393384
fi
394-
echo "WARN: docker mysql ${mysql_container} is unavailable for JuiceFS metadata init." >&2
385+
386+
echo "WARN: docker mysql_57 is not running; skip eager JuiceFS metadata database creation for ${meta_db}." >&2
387+
return 0
388+
fi
389+
390+
if [[ "${jfs_meta}" == postgres://* || "${jfs_meta}" == postgresql://* ]]; then
391+
if [[ "${jfs_meta}" != *"@127.0.0.1:"* && "${jfs_meta}" != *"@localhost:"* ]]; then
392+
return 0
393+
fi
394+
395+
pg_candidates=(
396+
"${CONTAINER_UID}hive3-metastore-postgresql"
397+
"${CONTAINER_UID}hive2-metastore-postgresql"
398+
)
399+
400+
for pg_container in "${pg_candidates[@]}"; do
401+
if ! sudo docker ps --format '{{.Names}}' | grep -Fxq "${pg_container}"; then
402+
continue
403+
fi
404+
405+
if sudo docker exec "${pg_container}" \
406+
psql -U postgres -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname='${meta_db}'" | grep -q '^1$'; then
407+
return 0
408+
fi
409+
410+
if sudo docker exec "${pg_container}" \
411+
psql -U postgres -d postgres -c "CREATE DATABASE \"${meta_db}\";" >/dev/null 2>&1; then
412+
return 0
413+
fi
414+
415+
echo "WARN: docker postgres ${pg_container} is unavailable for JuiceFS metadata init." >&2
416+
return 0
417+
done
418+
419+
echo "WARN: hive metastore postgresql is not running; skip eager JuiceFS metadata database creation for ${meta_db}." >&2
395420
return 0
396421
fi
397422

398-
echo "WARN: docker mysql_57 is not running; skip eager JuiceFS metadata database creation for ${meta_db}." >&2
399423
return 0
400424
}
401425

@@ -433,7 +457,10 @@ ensure_juicefs_hadoop_jar_for_hive() {
433457
prepare_juicefs_meta_for_hive() {
434458
local jfs_meta="$1"
435459
local jfs_cluster_name="${2:-cluster}"
436-
if [[ -z "${jfs_meta}" || "${jfs_meta}" != mysql://* ]]; then
460+
if [[ -z "${jfs_meta}" ]]; then
461+
return 0
462+
fi
463+
if [[ "${jfs_meta}" != mysql://* && "${jfs_meta}" != postgres://* && "${jfs_meta}" != postgresql://* ]]; then
437464
return 0
438465
fi
439466
if [[ "${JFS_META_FORMATTED}" -eq 1 ]]; then
@@ -444,7 +471,7 @@ prepare_juicefs_meta_for_hive() {
444471
sudo mkdir -p "${bucket_dir}"
445472
sudo chmod 777 "${bucket_dir}"
446473

447-
# For the default docker mysql_57 metadata DSN, ensure metadata database exists.
474+
# For local docker metadata DSNs (mysql/postgresql), ensure metadata database exists.
448475
ensure_juicefs_meta_database "${jfs_meta}"
449476

450477
if run_juicefs_cli status "${jfs_meta}" >/dev/null 2>&1; then

0 commit comments

Comments
 (0)