Skip to content

Commit 65fc071

Browse files
Merge pull request #903 from ClickHouse/clickbench-fixes
Fixes for various systems
2 parents 75e761c + 9f048fd commit 65fc071

167 files changed

Lines changed: 1431 additions & 1173 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

arc/install

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,41 @@ if [ ! -f "$DEB_FILE" ]; then
2626
fi
2727

2828
sudo dpkg -i "$DEB_FILE" || sudo apt-get install -f -y
29+
30+
# Arc 25.12+ links against GLIBC_2.38 (Debian/Ubuntu 24.04+ build
31+
# environment); on a 22.04 host (glibc 2.35) the daemon dies with
32+
# "libm.so.6: version `GLIBC_2.38' not found". Pull a newer libc6
33+
# into a sysroot and wrap /usr/bin/arc so it boots via that loader,
34+
# instead of bumping the whole base image to noble.
35+
HAVE_GLIBC=$(ldd --version | head -1 | awk '{print $NF}')
36+
if dpkg --compare-versions "$HAVE_GLIBC" lt 2.38; then
37+
NOBLE_DIR=/opt/glibc-noble
38+
if [ ! -x "$NOBLE_DIR/lib64/ld-linux-x86-64.so.2" ]; then
39+
# Noble's libc6, statically pinned. Source URL: launchpad mirror.
40+
# Plain `pool.ubuntu.com` redirects via HTTPS and works with
41+
# curl --location.
42+
LIBC_URL="https://launchpad.net/ubuntu/+archive/primary/+files/libc6_2.39-0ubuntu8.6_amd64.deb"
43+
sudo mkdir -p "$NOBLE_DIR"
44+
tmp=$(mktemp --suffix=.deb)
45+
curl -fsSL "$LIBC_URL" -o "$tmp"
46+
sudo dpkg-deb -x "$tmp" "$NOBLE_DIR"
47+
rm -f "$tmp"
48+
fi
49+
if [ ! -f /usr/bin/arc.bin ]; then
50+
sudo mv /usr/bin/arc /usr/bin/arc.bin
51+
# dpkg-deb -x preserves the package's internal paths under
52+
# $NOBLE_DIR, i.e. NOBLE_DIR/usr/lib64/ld-linux-x86-64.so.2
53+
# (NOT NOBLE_DIR/lib64/...).
54+
sudo tee /usr/bin/arc >/dev/null <<EOF
55+
#!/bin/bash
56+
# Launch arc.bin via noble's ld-linux + libc, leaving the rest of the
57+
# system unchanged. --library-path scopes the override to this
58+
# invocation only.
59+
exec ${NOBLE_DIR}/usr/lib64/ld-linux-x86-64.so.2 \\
60+
--library-path ${NOBLE_DIR}/usr/lib/x86_64-linux-gnu:${NOBLE_DIR}/usr/lib:/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu \\
61+
/usr/bin/arc.bin "\$@"
62+
EOF
63+
sudo chmod +x /usr/bin/arc
64+
fi
65+
sudo systemctl daemon-reload
66+
fi

arc/load

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,6 @@ TARGET_FILE="$TARGET_DIR/hits.parquet"
88

99
sudo mkdir -p "$TARGET_DIR"
1010

11-
if [ -f "$TARGET_FILE" ] && \
12-
[ "$(stat -c%s hits.parquet)" -eq "$(stat -c%s "$TARGET_FILE")" ]; then
13-
: # already loaded
14-
else
15-
sudo cp hits.parquet "$TARGET_FILE"
16-
fi
17-
18-
# Free up local space.
19-
rm -f hits.parquet
11+
# Symlink rather than copy — hits.parquet is 14 GB and we read it once.
12+
sudo ln -sfn "$PWD/hits.parquet" "$TARGET_FILE"
2013
sync

arc/start

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,38 @@ fi
1414
sudo systemctl start arc
1515

1616
# Wait for the HTTP endpoint to come up before we try to read the token.
17-
for _ in $(seq 1 30); do
17+
for _ in $(seq 1 60); do
1818
if curl -sf "$ARC_URL/health" >/dev/null 2>&1; then
1919
break
2020
fi
2121
sleep 1
2222
done
2323

2424
# On first start, Arc prints its admin token to its journal; capture it.
25+
# The log line has drifted between releases ("Initial admin API token:",
26+
# "Admin API token:", "API token:", ...) and journald can lag behind
27+
# /health, so we retry with a broader regex over ~60 s.
2528
if [ ! -f arc_token.txt ] || \
2629
! curl -sf "$ARC_URL/health" -H "x-api-key: $(cat arc_token.txt)" >/dev/null 2>&1; then
27-
TOKEN=$(sudo journalctl -u arc --no-pager \
28-
| grep -oP '(?:Initial admin API token|Admin API token): \K[^\s]+' \
29-
| head -1)
30+
TOKEN=""
31+
for _ in $(seq 1 60); do
32+
sudo journalctl --sync >/dev/null 2>&1 || true
33+
JOURNAL=$(sudo journalctl -u arc --no-pager 2>/dev/null || true)
34+
TOKEN=$(printf '%s\n' "$JOURNAL" \
35+
| grep -oP '(?:[Ii]nitial[[:space:]]+)?[Aa]dmin[[:space:]]+(?:API[[:space:]]+)?[Tt]oken[[:space:]]*[:=][[:space:]]*\K[^[:space:],]+' \
36+
| head -1)
37+
if [ -z "$TOKEN" ]; then
38+
TOKEN=$(printf '%s\n' "$JOURNAL" \
39+
| grep -oP '(?:API[[:space:]]+)?[Tt]oken[[:space:]]*[:=][[:space:]]*\K[A-Za-z0-9_.\-]{16,}' \
40+
| head -1)
41+
fi
42+
if [ -n "$TOKEN" ]; then break; fi
43+
sleep 1
44+
done
3045
if [ -z "$TOKEN" ]; then
3146
echo "Error: Could not extract Arc admin API token from journal" >&2
47+
echo "---journal tail---" >&2
48+
sudo journalctl -u arc --no-pager 2>&1 | tail -50 >&2
3249
exit 1
3350
fi
3451
echo "$TOKEN" > arc_token.txt

byconity/stop

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
#!/bin/bash
2+
# stop without --volumes — anonymous volumes hold the HDFS namenode
3+
# data and the bench database. The playground snapshots the VM after
4+
# ./stop and restores it on every query, so we need the data to outlive
5+
# the stop.
26

3-
docker compose down --volumes || true
7+
docker compose stop || true

cedardb-parquet/start

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,33 @@ if PGPASSWORD=test psql -h localhost -U postgres -c 'SELECT 1' >/dev/null 2>&1;
55
exit 0
66
fi
77

8+
# After a VM snapshot+restore, dockerd's in-memory networking/cgroup state
9+
# is out of sync with the (also-restored) kernel-side resources, and the
10+
# next `docker run` either fails or starts a container that can't be
11+
# reached on its mapped port. Restarting dockerd reconciles it.
12+
sudo systemctl restart docker
13+
for _ in $(seq 1 30); do
14+
sudo docker info >/dev/null 2>&1 && break
15+
sleep 1
16+
done
17+
818
sudo docker stop cedardb >/dev/null 2>&1 || true
919
sudo docker rm cedardb >/dev/null 2>&1 || true
1020

11-
sudo docker run -d --rm -p 5432:5432 \
12-
-v "$(pwd)/data:/data" \
13-
-v "$(pwd)/db:/var/lib/cedardb/data" \
14-
-e CEDAR_PASSWORD=test \
15-
--name cedardb cedardb/cedardb:latest >/dev/null
21+
if ! sudo docker run -d --rm -p 5432:5432 \
22+
-v "$(pwd)/data:/data" \
23+
-v "$(pwd)/db:/var/lib/cedardb/data" \
24+
-e CEDAR_PASSWORD=test \
25+
--name cedardb cedardb/cedardb:latest; then
26+
echo "docker run failed; ps -a:" >&2
27+
sudo docker ps -a >&2 || true
28+
exit 1
29+
fi
1630

17-
until pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1; do
31+
for _ in $(seq 1 60); do
32+
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
1833
sleep 1
1934
done
35+
echo "cedardb did not become ready in 60 s; container logs:" >&2
36+
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
37+
exit 1

cedardb/start

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,35 @@ if PGPASSWORD=test psql -h localhost -U postgres -c 'SELECT 1' >/dev/null 2>&1;
55
exit 0
66
fi
77

8+
# After a VM snapshot+restore, dockerd's in-memory networking/cgroup state
9+
# is out of sync with the (also-restored) kernel-side resources, and the
10+
# next `docker run` either fails or starts a container that can't be
11+
# reached on its mapped port. Restarting dockerd reconciles it. No-op on
12+
# initial provision (the daemon was just started).
13+
sudo systemctl restart docker
14+
for _ in $(seq 1 30); do
15+
sudo docker info >/dev/null 2>&1 && break
16+
sleep 1
17+
done
18+
819
# `docker run --rm` cleans up container on exit; we run detached.
920
sudo docker stop cedardb >/dev/null 2>&1 || true
1021
sudo docker rm cedardb >/dev/null 2>&1 || true
1122

12-
sudo docker run -d --rm -p 5432:5432 \
13-
-v "$(pwd)/data:/data" \
14-
-v "$(pwd)/db:/var/lib/cedardb/data" \
15-
-e CEDAR_PASSWORD=test \
16-
--name cedardb cedardb/cedardb:latest >/dev/null
23+
if ! sudo docker run -d --rm -p 5432:5432 \
24+
-v "$(pwd)/data:/data" \
25+
-v "$(pwd)/db:/var/lib/cedardb/data" \
26+
-e CEDAR_PASSWORD=test \
27+
--name cedardb cedardb/cedardb:latest; then
28+
echo "docker run failed; ps -a:" >&2
29+
sudo docker ps -a >&2 || true
30+
exit 1
31+
fi
1732

18-
until pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1; do
33+
for _ in $(seq 1 60); do
34+
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
1935
sleep 1
2036
done
37+
echo "cedardb did not become ready in 60 s; container logs:" >&2
38+
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
39+
exit 1

chdb-dataframe/.preserve-state

Whitespace-only changes.

chdb-dataframe/query

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
2-
# Reads a SQL query from stdin, dispatches to the running chdb server.
3-
# Stdout: server response JSON (small).
2+
# Reads a query from stdin, dispatches to the running in-VM server.
3+
# Stdout: result (rendered table or scalar from the server).
44
# Stderr: query runtime in fractional seconds on the last line.
55
# Exit non-zero on error.
66
set -e
@@ -19,5 +19,10 @@ if [ "$status" != "200" ]; then
1919
exit 1
2020
fi
2121

22-
echo "$body"
23-
echo "$body" | python3 -c 'import json,sys; print(json.load(sys.stdin)["elapsed"])' >&2
22+
# Pull `result` for stdout and `elapsed` for stderr (host timing protocol).
23+
python3 - "$body" <<'PY'
24+
import json, sys
25+
d = json.loads(sys.argv[1])
26+
print(d.get("result", ""))
27+
sys.stderr.write(str(d["elapsed"]) + "\n")
28+
PY

chdb-dataframe/server.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@ async def query(request: Request):
5757
if not sql:
5858
raise HTTPException(status_code=400, detail="empty query")
5959
start = timeit.default_timer()
60-
conn.query(sql, "Null")
60+
# Pretty so the playground UI shows the actual result table, not
61+
# just the timing.
62+
res = conn.query(sql, "Pretty")
6163
elapsed = round(timeit.default_timer() - start, 3)
62-
return {"elapsed": elapsed}
64+
return {"elapsed": elapsed, "result": str(res)}
6365

6466

6567
@app.get("/data-size")

0 commit comments

Comments
 (0)