Skip to content

Commit 25e5e66

Browse files
committed
[improvement](build) Make Hive refresh DDL idempotent and reduce noise
### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Convert Hive bootstrap HQL to drop-then-create style for repeatable refresh runs, add a non-MR load path for employees seed data, and reduce refresh/startup log noise by gating xtrace and skipping compose up in healthy refresh mode. ### Release note None ### Check List (For Author) - Test: Manual test - Regression test / Unit Test / Manual test / No need to test (with reason) - Ran: sudo bash docker/thirdparties/run-thirdparties-docker.sh -c hive3 --hive-mode refresh - Ran: sudo bash docker/thirdparties/run-thirdparties-docker.sh -c hive3 --hive-mode refresh --hive-modules preinstalled_hql - Behavior changed: Yes (Hive refresh becomes stricter idempotent drop+create path and has lower default startup log verbosity) - Does this need documentation: No
1 parent 4d185bc commit 25e5e66

186 files changed

Lines changed: 900 additions & 309 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
#
1717

1818

19-
version: "3.8"
20-
2119
services:
2220
namenode:
2321
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8

docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
#
1717

1818

19-
version: "3.8"
20-
2119
services:
2220
namenode:
2321
image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
CREATE DATABASE IF NOT EXISTS hdfs_db;
22
USE hdfs_db;
3-
CREATE TABLE external_test_table(
3+
drop table if exists external_test_table;
4+
create table external_test_table(
45
a INT COMMENT 'The a field',
56
b STRING COMMENT 'The b field'
67
)
@@ -10,38 +11,43 @@ INSERT INTO external_test_table VALUES(11111111, "hdfs_db_test");
1011
SET hive.metastore.warehouse.dir=s3a://selectdb-qa-datalake-test-hk/paimon_warehouse;
1112
CREATE DATABASE IF NOT EXISTS aws_db;
1213
USE aws_db;
13-
CREATE EXTERNAL TABLE external_test_table
14+
drop table if exists external_test_table;
15+
create external table external_test_table
1416
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
1517
LOCATION 's3a://selectdb-qa-datalake-test-hk/paimon_warehouse/aws_db.db/hive_test_table';
1618

1719

1820
SET hive.metastore.warehouse.dir=oss://doris-regression-bj/regression/paimon_warehouse;
19-
CREATE DATABASE ali_db;
21+
CREATE DATABASE if not exists ali_db;
2022
USE ali_db;
21-
CREATE EXTERNAL TABLE external_test_table
23+
drop table if exists external_test_table;
24+
create external table external_test_table
2225
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
2326
LOCATION 'oss://doris-regression-bj/regression/paimon_warehouse/ali_db.db/hive_test_table';
2427

2528

2629
SET hive.metastore.warehouse.dir=obs://doris-build/regression/paimon_warehouse;
27-
CREATE DATABASE hw_db;
30+
CREATE DATABASE if not exists hw_db;
2831
USE hw_db;
29-
CREATE EXTERNAL TABLE external_test_table
32+
drop table if exists external_test_table;
33+
create external table external_test_table
3034
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
3135
LOCATION 'obs://doris-build/regression/paimon_warehouse/hw_db.db/hive_test_table';
3236

3337

3438
SET hive.metastore.warehouse.dir=cosn://sdb-qa-datalake-test-1308700295/paimon_warehouse;
35-
CREATE DATABASE tx_db;
39+
CREATE DATABASE if not exists tx_db;
3640
USE tx_db;
37-
CREATE EXTERNAL TABLE external_test_table
41+
drop table if exists external_test_table;
42+
create external table external_test_table
3843
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
3944
LOCATION 'cosn://sdb-qa-datalake-test-1308700295/paimon_warehouse/tx_db.db/hive_test_table';
4045

4146

4247
SET hive.metastore.warehouse.dir=gs://selectdb-qa-datalake-test/paimon_warehouse;
43-
CREATE DATABASE gcs_db;
48+
CREATE DATABASE if not exists gcs_db;
4449
USE gcs_db;
45-
CREATE EXTERNAL TABLE external_test_table
50+
drop table if exists external_test_table;
51+
create external table external_test_table
4652
STORED BY 'org.apache.paimon.hive.PaimonStorageHandler'
4753
LOCATION 'gs://selectdb-qa-datalake-test/paimon_warehouse/gcs_db.db/hive_test_table';

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/create_hive_orc_tables.hql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
create database if not exists multi_catalog;
22
use multi_catalog;
33

4-
CREATE TABLE complex_data_orc (
4+
drop table if exists complex_data_orc;
5+
6+
create table complex_data_orc (
57
id INT,
68
m MAP<STRING, INT>,
79
l ARRAY<STRING>

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/create_tpch1_orc.hql

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
create database tpch1_orc;
1+
create database if not exists tpch1_orc;
22
use tpch1_orc;
33

4-
CREATE TABLE `customer`(
4+
drop table if exists `customer`;
5+
6+
create table `customer`(
57
`c_custkey` int,
68
`c_name` string,
79
`c_address` string,
@@ -24,7 +26,9 @@ LOCATION
2426
TBLPROPERTIES (
2527
'transient_lastDdlTime'='1661955829');
2628

27-
CREATE TABLE `lineitem`(
29+
drop table if exists `lineitem`;
30+
31+
create table `lineitem`(
2832
`l_orderkey` int,
2933
`l_partkey` int,
3034
`l_suppkey` int,
@@ -55,7 +59,9 @@ LOCATION
5559
TBLPROPERTIES (
5660
'transient_lastDdlTime'='1661955829');
5761

58-
CREATE TABLE `nation`(
62+
drop table if exists `nation`;
63+
64+
create table `nation`(
5965
`n_nationkey` int,
6066
`n_name` string,
6167
`n_regionkey` int,
@@ -74,7 +80,9 @@ LOCATION
7480
TBLPROPERTIES (
7581
'transient_lastDdlTime'='1661955829');
7682

77-
CREATE TABLE `orders`(
83+
drop table if exists `orders`;
84+
85+
create table `orders`(
7886
`o_orderkey` int,
7987
`o_custkey` int,
8088
`o_orderstatus` string,
@@ -98,7 +106,9 @@ LOCATION
98106
TBLPROPERTIES (
99107
'transient_lastDdlTime'='1661955829');
100108

101-
CREATE TABLE `part`(
109+
drop table if exists `part`;
110+
111+
create table `part`(
102112
`p_partkey` int,
103113
`p_name` string,
104114
`p_mfgr` string,
@@ -122,7 +132,9 @@ LOCATION
122132
TBLPROPERTIES (
123133
'transient_lastDdlTime'='1661955829');
124134

125-
CREATE TABLE `partsupp`(
135+
drop table if exists `partsupp`;
136+
137+
create table `partsupp`(
126138
`ps_partkey` int,
127139
`ps_suppkey` int,
128140
`ps_availqty` int,
@@ -142,7 +154,9 @@ LOCATION
142154
TBLPROPERTIES (
143155
'transient_lastDdlTime'='1661955829');
144156

145-
CREATE TABLE `region`(
157+
drop table if exists `region`;
158+
159+
create table `region`(
146160
`r_regionkey` int,
147161
`r_name` string,
148162
`r_comment` string)
@@ -160,7 +174,9 @@ LOCATION
160174
TBLPROPERTIES (
161175
'transient_lastDdlTime'='1661955829');
162176

163-
CREATE TABLE `supplier`(
177+
drop table if exists `supplier`;
178+
179+
create table `supplier`(
164180
`s_suppkey` int,
165181
`s_name` string,
166182
`s_address` string,

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/create_tpch1_parquet.hql

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
create database tpch1_parquet;
1+
create database if not exists tpch1_parquet;
22
use tpch1_parquet;
33

4-
CREATE TABLE `customer`(
4+
drop table if exists `customer`;
5+
6+
create table `customer`(
57
`c_custkey` int,
68
`c_name` string,
79
`c_address` string,
@@ -21,7 +23,9 @@ LOCATION
2123
TBLPROPERTIES (
2224
'transient_lastDdlTime'='1661955829');
2325

24-
CREATE TABLE `lineitem`(
26+
drop table if exists `lineitem`;
27+
28+
create table `lineitem`(
2529
`l_orderkey` int,
2630
`l_partkey` int,
2731
`l_suppkey` int,
@@ -49,7 +53,9 @@ LOCATION
4953
TBLPROPERTIES (
5054
'transient_lastDdlTime'='1661955829');
5155

52-
CREATE TABLE `nation`(
56+
drop table if exists `nation`;
57+
58+
create table `nation`(
5359
`n_nationkey` int,
5460
`n_name` string,
5561
`n_regionkey` int,
@@ -65,7 +71,9 @@ LOCATION
6571
TBLPROPERTIES (
6672
'transient_lastDdlTime'='1661955829');
6773

68-
CREATE TABLE `orders`(
74+
drop table if exists `orders`;
75+
76+
create table `orders`(
6977
`o_orderkey` int,
7078
`o_custkey` int,
7179
`o_orderstatus` string,
@@ -86,7 +94,9 @@ LOCATION
8694
TBLPROPERTIES (
8795
'transient_lastDdlTime'='1661955829');
8896

89-
CREATE TABLE `part`(
97+
drop table if exists `part`;
98+
99+
create table `part`(
90100
`p_partkey` int,
91101
`p_name` string,
92102
`p_mfgr` string,
@@ -107,7 +117,9 @@ LOCATION
107117
TBLPROPERTIES (
108118
'transient_lastDdlTime'='1661955829');
109119

110-
CREATE TABLE `partsupp`(
120+
drop table if exists `partsupp`;
121+
122+
create table `partsupp`(
111123
`ps_partkey` int,
112124
`ps_suppkey` int,
113125
`ps_availqty` int,
@@ -124,7 +136,9 @@ LOCATION
124136
TBLPROPERTIES (
125137
'transient_lastDdlTime'='1661955829');
126138

127-
CREATE TABLE `region`(
139+
drop table if exists `region`;
140+
141+
create table `region`(
128142
`r_regionkey` int,
129143
`r_name` string,
130144
`r_comment` string)
@@ -139,7 +153,9 @@ LOCATION
139153
TBLPROPERTIES (
140154
'transient_lastDdlTime'='1661955829');
141155

142-
CREATE TABLE `supplier`(
156+
drop table if exists `supplier`;
157+
158+
create table `supplier`(
143159
`s_suppkey` int,
144160
`s_name` string,
145161
`s_address` string,
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1,John Doe,IT,75000.00,2020-01-15
2+
2,Jane Smith,HR,65000.00,2019-03-20
3+
3,Bob Johnson,IT,80000.00,2021-05-10
4+
4,Alice Brown,Finance,70000.00,2020-11-30
5+
5,Charlie Wilson,HR,62000.00,2022-01-05

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run02.hql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
CREATE TABLE `partition_table`(
1+
drop table if exists `partition_table`;
2+
create table `partition_table`(
23
`l_orderkey` int,
34
`l_partkey` int,
45
`l_suppkey` int,

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run03.hql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
CREATE TABLE `delta_byte_array`(
1+
drop table if exists `delta_byte_array`;
2+
create table `delta_byte_array`(
23
`c_salutation` string,
34
`c_first_name` string,
45
`c_last_name` string,

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run04.hql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
CREATE TABLE `delta_length_byte_array`(
1+
drop table if exists `delta_length_byte_array`;
2+
create table `delta_length_byte_array`(
23
`FRUIT` string
34
)
45
ROW FORMAT SERDE

0 commit comments

Comments
 (0)