33
44NUM_CORES=$( nproc)
55
6- # Prefer the pre-decompressed hits.json shipped on the playground's
7- # readonly dataset disk — it's a 217 GB symlink target, doesn't burn
8- # the VM's 200 GB sparse sysdisk on a redundant gunzip. Fall back to
9- # wget + gunzip for standalone use.
10- if [ -f /opt/clickbench/datasets_ro/hits.json ]; then
11- ln -sf /opt/clickbench/datasets_ro/hits.json hits.json
12- elif [ -f /opt/clickbench/datasets_ro/hits.json.gz ]; then
13- ln -sf /opt/clickbench/datasets_ro/hits.json.gz hits.json.gz
14- FILE_SIZE=$( stat -L -c %s hits.json.gz)
15- pv -s " $FILE_SIZE " hits.json.gz | pigz -d > hits.json
16- else
17- wget --continue --progress=dot:giga \
18- ' https://datasets.clickhouse.com/hits_compatible/hits.json.gz'
19- FILE_SIZE=$( stat -L -c %s hits.json.gz)
20- pv -s " $FILE_SIZE " hits.json.gz | pigz -d > hits.json
21- fi
6+ # hits.json is delivered by the shared lib/download-hits-json helper
7+ # (symlink to the RO dataset on the playground, wget + pigz
8+ # elsewhere).
229
2310# Create the stream first — ingest below needs it to exist.
2411curl --silent --location --request PUT ' http://localhost:8000/api/v1/logstream/hits' \
@@ -45,8 +32,8 @@ pv hits.json | parallel --pipe -N$LINES_PER_CHUNK --block 10M \
4532 --data-binary @- >/dev/null
4633 '
4734
48- # Drop the symlink to the RO dataset — no chunk files to clean up .
49- rm -f hits.json hits.json.gz
35+ # Drop the symlink/file delivered by lib/download-hits-json .
36+ rm -f hits.json
5037
5138# Allow sync to complete.
5239sleep 180
0 commit comments