Skip to content

Commit 4361e55

Browse files
Merge pull request #908 from ClickHouse/add-greengage-warehousepg-opengpdb
Add Greengage, WarehousePG, and OpenGPDB
2 parents fc9ac48 + da29c43 commit 4361e55

39 files changed

Lines changed: 1554 additions & 0 deletions

greengage/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Greengage DB is an open-source MPP database based on Greenplum,
2+
maintained by the GreengageDB community after Greenplum went closed
3+
source in 2024. https://github.com/GreengageDB/greengage
4+
5+
The benchmark runs Greengage inside a privileged Ubuntu 22.04 docker
6+
container, so the host can be any distro with docker. The install
7+
script fetches the upstream `greengage6.deb` (no source build) and
8+
initializes a single-host, 14-segment cluster via gpinitsystem.
9+
10+
The other scripts (start/stop/check/load/query) `docker exec` into the
11+
running container.
12+
13+
To run the test:
14+
15+
```
16+
./benchmark.sh
17+
```

greengage/benchmark.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
# Thin shim — actual flow is in lib/benchmark-common.sh.
3+
export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv"
4+
export BENCH_DURABLE=yes
5+
exec ../lib/benchmark-common.sh

greengage/check

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
set -e
3+
4+
NAME=clickbench-greengage
5+
6+
sudo docker exec "$NAME" sudo -iu gpadmin bash -lc \
7+
'psql -d postgres -t -c "SELECT 1"' >/dev/null

greengage/create.sql

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
drop table if exists hits;
2+
CREATE TABLE hits
3+
(
4+
WatchID BIGINT NOT NULL,
5+
JavaEnable SMALLINT NOT NULL,
6+
Title TEXT NOT NULL,
7+
GoodEvent SMALLINT NOT NULL,
8+
EventTime TIMESTAMP NOT NULL,
9+
EventDate Date NOT NULL,
10+
CounterID INTEGER NOT NULL,
11+
ClientIP INTEGER NOT NULL,
12+
RegionID INTEGER NOT NULL,
13+
UserID BIGINT NOT NULL,
14+
CounterClass SMALLINT NOT NULL,
15+
OS SMALLINT NOT NULL,
16+
UserAgent SMALLINT NOT NULL,
17+
URL TEXT NOT NULL,
18+
Referer TEXT NOT NULL,
19+
IsRefresh SMALLINT NOT NULL,
20+
RefererCategoryID SMALLINT NOT NULL,
21+
RefererRegionID INTEGER NOT NULL,
22+
URLCategoryID SMALLINT NOT NULL,
23+
URLRegionID INTEGER NOT NULL,
24+
ResolutionWidth SMALLINT NOT NULL,
25+
ResolutionHeight SMALLINT NOT NULL,
26+
ResolutionDepth SMALLINT NOT NULL,
27+
FlashMajor SMALLINT NOT NULL,
28+
FlashMinor SMALLINT NOT NULL,
29+
FlashMinor2 TEXT NOT NULL,
30+
NetMajor SMALLINT NOT NULL,
31+
NetMinor SMALLINT NOT NULL,
32+
UserAgentMajor SMALLINT NOT NULL,
33+
UserAgentMinor VARCHAR(255) NOT NULL,
34+
CookieEnable SMALLINT NOT NULL,
35+
JavascriptEnable SMALLINT NOT NULL,
36+
IsMobile SMALLINT NOT NULL,
37+
MobilePhone SMALLINT NOT NULL,
38+
MobilePhoneModel TEXT NOT NULL,
39+
Params TEXT NOT NULL,
40+
IPNetworkID INTEGER NOT NULL,
41+
TraficSourceID SMALLINT NOT NULL,
42+
SearchEngineID SMALLINT NOT NULL,
43+
SearchPhrase TEXT NOT NULL,
44+
AdvEngineID SMALLINT NOT NULL,
45+
IsArtifical SMALLINT NOT NULL,
46+
WindowClientWidth SMALLINT NOT NULL,
47+
WindowClientHeight SMALLINT NOT NULL,
48+
ClientTimeZone SMALLINT NOT NULL,
49+
ClientEventTime TIMESTAMP NOT NULL,
50+
SilverlightVersion1 SMALLINT NOT NULL,
51+
SilverlightVersion2 SMALLINT NOT NULL,
52+
SilverlightVersion3 INTEGER NOT NULL,
53+
SilverlightVersion4 SMALLINT NOT NULL,
54+
PageCharset TEXT NOT NULL,
55+
CodeVersion INTEGER NOT NULL,
56+
IsLink SMALLINT NOT NULL,
57+
IsDownload SMALLINT NOT NULL,
58+
IsNotBounce SMALLINT NOT NULL,
59+
FUniqID BIGINT NOT NULL,
60+
OriginalURL TEXT NOT NULL,
61+
HID INTEGER NOT NULL,
62+
IsOldCounter SMALLINT NOT NULL,
63+
IsEvent SMALLINT NOT NULL,
64+
IsParameter SMALLINT NOT NULL,
65+
DontCountHits SMALLINT NOT NULL,
66+
WithHash SMALLINT NOT NULL,
67+
HitColor CHAR NOT NULL,
68+
LocalEventTime TIMESTAMP NOT NULL,
69+
Age SMALLINT NOT NULL,
70+
Sex SMALLINT NOT NULL,
71+
Income SMALLINT NOT NULL,
72+
Interests SMALLINT NOT NULL,
73+
Robotness SMALLINT NOT NULL,
74+
RemoteIP INTEGER NOT NULL,
75+
WindowName INTEGER NOT NULL,
76+
OpenerName INTEGER NOT NULL,
77+
HistoryLength SMALLINT NOT NULL,
78+
BrowserLanguage TEXT NOT NULL,
79+
BrowserCountry TEXT NOT NULL,
80+
SocialNetwork TEXT NOT NULL,
81+
SocialAction TEXT NOT NULL,
82+
HTTPError SMALLINT NOT NULL,
83+
SendTiming INTEGER NOT NULL,
84+
DNSTiming INTEGER NOT NULL,
85+
ConnectTiming INTEGER NOT NULL,
86+
ResponseStartTiming INTEGER NOT NULL,
87+
ResponseEndTiming INTEGER NOT NULL,
88+
FetchTiming INTEGER NOT NULL,
89+
SocialSourceNetworkID SMALLINT NOT NULL,
90+
SocialSourcePage TEXT NOT NULL,
91+
ParamPrice BIGINT NOT NULL,
92+
ParamOrderID TEXT NOT NULL,
93+
ParamCurrency TEXT NOT NULL,
94+
ParamCurrencyID SMALLINT NOT NULL,
95+
OpenstatServiceName TEXT NOT NULL,
96+
OpenstatCampaignID TEXT NOT NULL,
97+
OpenstatAdID TEXT NOT NULL,
98+
OpenstatSourceID TEXT NOT NULL,
99+
UTMSource TEXT NOT NULL,
100+
UTMMedium TEXT NOT NULL,
101+
UTMCampaign TEXT NOT NULL,
102+
UTMContent TEXT NOT NULL,
103+
UTMTerm TEXT NOT NULL,
104+
FromTag TEXT NOT NULL,
105+
HasGCLID SMALLINT NOT NULL,
106+
RefererHash BIGINT NOT NULL,
107+
URLHash BIGINT NOT NULL,
108+
CLID INTEGER NOT NULL
109+
)
110+
with (appendoptimized=true,orientation=column,compresstype=zstd)
111+
DISTRIBUTED BY (UserID);
112+
CREATE INDEX hits_idx on hits using btree (CounterID, EventDate, UserID, EventTime, WatchID);
113+
drop external table if exists hits_ext;
114+
CREATE EXTERNAL TABLE hits_ext (like hits)
115+
LOCATION ('gpfdist://localhost:8080/hits.tsv')
116+
FORMAT 'TEXT';

greengage/data-size

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# The cluster's data lives inside the container under /data0, not on the
3+
# host. Run du inside the container so we see the actual bytes.
4+
set -eu
5+
6+
NAME=clickbench-greengage
7+
8+
sudo docker exec "$NAME" du -bcs /data0 2>/dev/null | \
9+
awk '/total$/ { print $1 }'

greengage/gpinitsystem_config

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# FILE NAME: gpinitsystem_config
2+
3+
# Configuration file needed by the gpinitsystem
4+
5+
################################################
6+
#### REQUIRED PARAMETERS
7+
################################################
8+
9+
#### Naming convention for utility-generated data directories.
10+
SEG_PREFIX=gpseg
11+
12+
#### Base number by which primary segment port numbers
13+
#### are calculated.
14+
PORT_BASE=6000
15+
16+
#### File system location(s) where primary segment data directories
17+
#### will be created. The number of locations in the list dictate
18+
#### the number of primary segments that will get created per
19+
#### physical host (if multiple addresses for a host are listed in
20+
#### the hostfile, the number of segments will be spread evenly across
21+
#### the specified interface addresses).
22+
declare -a DATA_DIRECTORY=(/data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary /data0/primary)
23+
24+
#### Display name for the array. Stops gpinitsystem's [WARN] about the
25+
#### default and makes log output identify the cluster.
26+
ARRAY_NAME="ClickBench"
27+
28+
#### OS-configured hostname or IP address of the coordinator host.
29+
#### Greengage 6 / OpenGPDB 6 are Greenplum 6 forks and still use the
30+
#### MASTER_* variable names; gpinitsystem fatals with
31+
#### "MASTER_HOSTNAME variable not set" if the COORDINATOR_* names from
32+
#### Greenplum 7+ are used.
33+
MASTER_HOSTNAME=localhost
34+
35+
#### File system location where the coordinator data directory
36+
#### will be created.
37+
MASTER_DIRECTORY=/data0/coordinator
38+
39+
#### Port number for the coordinator instance.
40+
MASTER_PORT=5432
41+
42+
#### Shell utility used to connect to remote hosts.
43+
TRUSTED_SHELL=ssh
44+
45+
#### Default server-side character set encoding.
46+
ENCODING=UNICODE
47+
48+
################################################
49+
#### OPTIONAL MIRROR PARAMETERS
50+
################################################
51+
52+
#### Base number by which mirror segment port numbers
53+
#### are calculated.
54+
#MIRROR_PORT_BASE=7000
55+
56+
#### File system location(s) where mirror segment data directories
57+
#### will be created. The number of mirror locations must equal the
58+
#### number of primary locations as specified in the
59+
#### DATA_DIRECTORY parameter.
60+
#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror)
61+
62+
63+
################################################
64+
#### OTHER OPTIONAL PARAMETERS
65+
################################################
66+
67+
#### Create a database of this name after initialization.
68+
#DATABASE_NAME=name_of_database
69+
70+
#### Specify the location of the host address file here instead of
71+
#### with the -h option of gpinitsystem.
72+
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem

0 commit comments

Comments
 (0)