Skip to content

Commit a023ad9

Browse files
committed
wb | new genesis "chunks" backend separating dataset from protocol
1 parent 734d6b7 commit a023ad9

21 files changed

Lines changed: 1752 additions & 3 deletions

nix/workbench/backend/runner.nix

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ let
4646
export WB_BACKEND=${backendName}
4747
export WB_BACKEND_DATA=${backendDataDir}
4848
export WB_CREATE_TESTNET_DATA=''${WB_CREATE_TESTNET_DATA:-1}
49-
export WB_DEPLOYMENT_NAME=''${WB_DEPLOYMENT_NAME:-$(basename $(pwd))}
49+
export WB_GENESIS_CHUNKS=''${WB_GENESIS_CHUNKS:-0}
5050
export WB_MODULAR_GENESIS=''${WB_MODULAR_GENESIS:-0}
51+
export WB_DEPLOYMENT_NAME=''${WB_DEPLOYMENT_NAME:-$(basename $(pwd))}
5152
export WB_LOCLI_DB=''${WB_LOCLI_DB:-1}
5253
if test -z "$(git status --porcelain --untracked-files=no)"
5354
then export WB_GITREV="$(git rev-parse HEAD)"
Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
# shellcheck shell=bash
2+
#
3+
# Dataset-cached genesis backend.
4+
# Used when WB_DATASET_GENESIS=1.
5+
#
6+
# Implements the backend interface:
7+
# profile-cache-key-input-chunks, profile-cache-key-chunks,
8+
# spec-chunks, pool-relays-chunks,
9+
# genesis-create-chunks,
10+
# derive-from-cache-chunks
11+
#
12+
# Three-level cache:
13+
#
14+
# Level 1: dataset cache ($cache_dir/genesis/dataset/)
15+
# Keyed by dataset params (pools, delegators, supply, etc.) + hash.
16+
# Shared across profiles with the same dataset. Expensive to create.
17+
# Stores:
18+
# - byron.dataset.json {bootStakeholders, heavyDelegation, nonAvvmBalances}
19+
# - shelley.dataset.json {initialFunds, staking, maxLovelaceSupply} (can be 500MB+)
20+
# - conway.dataset.json {initialDReps} (can be 200MB+)
21+
# - byron-gen-command/ drep-keys/ pools-keys/ stake-delegators/ utxo-keys/
22+
#
23+
# Level 2: protocol cache ($cache_dir/genesis/protocol/)
24+
# Keyed by hash of genesis.shelley + genesis.alonzo + genesis.conway +
25+
# genesis.dijkstra from the profile. Cheap to create.
26+
# Stores the spec-derived parts WITHOUT dataset fields and WITHOUT
27+
# systemStart (which is per-run):
28+
# - byron.protocol.json (spec minus dataset fields)
29+
# - shelley.protocol.json (spec minus dataset fields and systemStart)
30+
# - alonzo.protocol.json (complete spec)
31+
# - conway.protocol.json (spec minus dataset fields)
32+
# - dijkstra.protocol.json (complete spec)
33+
#
34+
# Level 3: genesis cache ($cache_dir/genesis/)
35+
# Keyed by: profile name + protocol hash + dataset hash.
36+
# Assembled from level 1 + level 2 + systemStart using cat/sed/printf.
37+
# No `jq` on any large file. No pretty-printing.
38+
39+
# -- Backend interface ---------------------------------------------------------
40+
41+
# The genesis cache key combines profile name, protocol params hash, and
42+
# dataset hash.
43+
profile-cache-key-input-chunks() {
44+
local profile_json=$1
45+
local proto_hash dataset_hash
46+
proto_hash=$(jq -c -S '{
47+
byron: ( .genesis.byron // {} ),
48+
shelley: ( .genesis.shelley ),
49+
alonzo: ( .genesis.alonzo ),
50+
conway: ( .genesis.conway // {} ),
51+
dijkstra: ( .genesis.dijkstra // {} )
52+
}' "$profile_json" | sha1sum | cut -c-7)
53+
dataset_hash=$(dataset-cache-key "$profile_json" | sha1sum | cut -c-7)
54+
echo "${proto_hash}-${dataset_hash}"
55+
}
56+
57+
profile-cache-key-chunks() {
58+
local profile_json=$1
59+
local name
60+
name=$(jq -r '.name' "$profile_json")
61+
echo "${name}-$(profile-cache-key-input-chunks "$profile_json")"
62+
}
63+
64+
spec-chunks() {
65+
spec-jq "$@";
66+
}
67+
68+
pool-relays-chunks() {
69+
pool-relays-jq "$@";
70+
}
71+
72+
# Ensure sub-caches exist, assemble the genesis cache entry with symlinks
73+
# to the dataset cache for key directories, and pre-assemble the genesis
74+
# JSON files that don't change per run. Only genesis-shelley.json (which
75+
# contains the per-run systemStart) is left for finalise-cache-entry-chunks.
76+
genesis-create-chunks() {
77+
local profile_json=$1
78+
local outdir=$2
79+
80+
mkdir -p "$outdir"
81+
82+
local dataset_key dataset_entry proto_key proto_entry
83+
dataset_key=$(dataset-cache-key "$profile_json")
84+
protocol_key=$(protocol-cache-key "$profile_json")
85+
# Resolve $outdir: $cache_dir/genesis/KEY as $cache_dir/genesis/{dataset|protocol}.
86+
dataset_entry=$(dataset-cache-ensure "$profile_json" "$outdir/../dataset/$dataset_key")
87+
protocol_entry=$(protocol-cache-ensure "$profile_json" "$outdir/../protocol/$protocol_key")
88+
89+
ln -sf "$dataset_entry" "$outdir/dataset"
90+
ln -sf "$protocol_entry" "$outdir/protocol"
91+
92+
# -- genesis JSON files that don't change per run: assemble once -------------
93+
94+
# Byron and Shelley change per run (startTime/systemStart) so they are
95+
# assembled in derive-from-cache-chunks instead.
96+
97+
# Alonzo: protocol parameters only, no dataset fields.
98+
cat "$protocol_entry/alonzo.protocol.json" > "$outdir/genesis.alonzo.json"
99+
100+
# Conway: protocol parameters + dataset.
101+
{
102+
cat "$protocol_entry/conway.protocol.json" | sed 's/}$//'
103+
printf ','
104+
tail -c +2 "$dataset_entry/conway.dataset.json"
105+
} > "$outdir/genesis.conway.json"
106+
107+
# Dijkstra: protocol parameters only, no dataset fields.
108+
cat "$protocol_entry/dijkstra.protocol.json" > "$outdir/genesis.dijkstra.json"
109+
110+
info genesis "genesis cache entry created in $outdir"
111+
}
112+
113+
# Populate the run directory from the genesis cache entry.
114+
# Everything is symlinked except genesis.shelley.json which is assembled
115+
# fresh per run (because of systemStart). Pure cat/sed/printf for shelley.
116+
derive-from-cache-chunks() {
117+
local profile_json=${1:?$usage}
118+
local timing=${2:?$usage}
119+
local cache_entry=${3:?$usage}
120+
local outdir=${4:?$usage} # output directory (run dir's genesis/, e.g. run/current/genesis).
121+
122+
mkdir -p "$outdir"
123+
124+
local preset
125+
preset=$(profile preset "$profile_json")
126+
if [[ -n "$preset" ]]; then
127+
progress "genesis" "instantiating from preset $(with_color white "$preset"): $cache_entry"
128+
cp -f "$cache_entry"/genesis*.json "$outdir"
129+
return
130+
fi
131+
132+
progress "genesis" "deriving from cache: $cache_entry -> $outdir"
133+
134+
ln -s "$profile_json" "$outdir"/profile.json
135+
ln -s "$cache_entry" "$outdir"/cache-entry
136+
ln -s "$cache_entry"/cache.key "$outdir"
137+
ln -s "$cache_entry"/cache.key.input "$outdir"
138+
ln -s "$cache_entry"/layout.version "$outdir"
139+
140+
# Key directories as symlinks from cache (create-testnet-data output names).
141+
for keydir in byron-gen-command drep-keys pools-keys stake-delegators utxo-keys
142+
do
143+
ln -s "$cache_entry/dataset/$keydir" "$outdir/$keydir"
144+
done
145+
146+
# Genesis JSON files that don't change per run as symlinks from cache.
147+
ln -s "$cache_entry"/genesis.alonzo.json "$outdir"/genesis.alonzo.json
148+
ln -s "$cache_entry"/genesis.conway.json "$outdir"/genesis.conway.json
149+
ln -s "$cache_entry"/genesis.dijkstra.json "$outdir"/genesis.dijkstra.json
150+
151+
# Byron and Shelley change per run (startTime / systemStart).
152+
# Assembled from protocol + dataset + per-run timing using cat/sed/printf.
153+
local system_start_epoch system_start
154+
system_start_epoch=$(jq -r '.start' <<<"$timing")
155+
system_start=$(jq -r '.systemStart' <<<"$timing")
156+
157+
# Byron (always small)
158+
{
159+
cat "$cache_entry/protocol/byron.protocol.json" | sed 's/}$//'
160+
printf ',"startTime":%s' "$system_start_epoch"
161+
printf ','
162+
tail -c +2 "$cache_entry/dataset/byron.dataset.json"
163+
} > "$outdir/genesis.byron.json"
164+
165+
# Shelley (can be 500MB+)
166+
{
167+
cat "$cache_entry/protocol/shelley.protocol.json" | sed 's/}$//'
168+
printf ',"systemStart":"%s"' "$system_start"
169+
printf ','
170+
tail -c +2 "$cache_entry/dataset/shelley.dataset.json"
171+
} > "$outdir/genesis.shelley.json"
172+
}
173+
174+
# ==============================================================================
175+
# Dataset cache (level 1)
176+
# ==============================================================================
177+
178+
# Build the dataset cache key from a profile JSON.
179+
dataset-cache-key() {
180+
local profile_json=$1
181+
182+
local params_key
183+
params_key=$(jq -r '
184+
"pools-" + (.composition.n_pools | tostring)
185+
+ "-stake-delegators-" + (.derived.delegators_effective | tostring)
186+
+ "-drep-keys-" + (.genesis.dreps | tostring)
187+
+ "-stuffed-utxo-" + (.derived.utxo_stuffed | tostring)
188+
+ "-utxo-keys-" + (.genesis.utxo_keys | tostring)
189+
+ "-total-supply-" + (.derived.supply_total | tostring)
190+
+ "-delegated-supply-" + (.derived.supply_delegated | tostring)
191+
+ "-testnet-magic-" + (.genesis.network_magic | tostring)
192+
' "$profile_json")
193+
194+
local hash
195+
hash=$(echo "$params_key" | sha1sum | cut -c-7)
196+
197+
echo "${params_key}-${hash}"
198+
}
199+
200+
# Ensure a dataset cache entry exists. If not, create it.
201+
# Returns the cache entry path on stdout.
202+
dataset-cache-ensure() {
203+
local profile_json=$1
204+
local outdir=$2
205+
local zero_dir="$global_basedir/profile/presets/zero/genesis"
206+
207+
if test -d "$outdir"
208+
then
209+
info genesis "dataset cache hit: $outdir"
210+
else
211+
info genesis "dataset cache miss: $outdir"
212+
# Create in a temporary directory and move if no errors.
213+
local tmpdir
214+
tmpdir=$(mktemp -d)
215+
trap 'rm -rf "$tmpdir"' EXIT
216+
# Extract dataset parameters from canonical profile fields
217+
local era pools delegators dreps stuffed utxo_keys total_supply delegated_supply magic
218+
era=$(jq --raw-output '.era' "$profile_json")
219+
pools=$(jq --raw-output '.composition.n_pools' "$profile_json")
220+
delegators=$(jq --raw-output '.derived.delegators_effective' "$profile_json")
221+
dreps=$(jq --raw-output '.genesis.dreps' "$profile_json")
222+
stuffed=$(jq --raw-output '.derived.utxo_stuffed' "$profile_json")
223+
utxo_keys=$(jq --raw-output '.genesis.utxo_keys' "$profile_json")
224+
total_supply=$(jq --raw-output '.derived.supply_total' "$profile_json")
225+
delegated_supply=$(jq --raw-output '.derived.supply_delegated' "$profile_json")
226+
magic=$(jq --raw-output '.genesis.network_magic' "$profile_json")
227+
# Run `create-testnet-data` with zeroed specs (see profile/presets/zero).
228+
progress genesis "creating dataset cache: $outdir"
229+
cardano-cli "$era" genesis create-testnet-data \
230+
--spec-shelley "$zero_dir/shelley-genesis.json" \
231+
--spec-alonzo "$zero_dir/alonzo-genesis.json" \
232+
--spec-conway "$zero_dir/conway-genesis.json" \
233+
--out-dir "$tmpdir" \
234+
--start-time "1970-01-01T00:00:00Z" \
235+
--testnet-magic "$magic" \
236+
--total-supply "$total_supply" \
237+
--delegated-supply "$delegated_supply" \
238+
--utxo-keys "$utxo_keys" \
239+
--pools "$pools" \
240+
--stake-delegators "$delegators" \
241+
--drep-keys "$dreps" \
242+
--stuffed-utxo "$stuffed"
243+
# Extract dataset fields as compact JSON (one jq pass per file)
244+
jq -c '{bootStakeholders, heavyDelegation, nonAvvmBalances}' "$tmpdir/byron-genesis.json" \
245+
> "$tmpdir/byron.dataset.json"
246+
jq -c '{initialFunds, staking, maxLovelaceSupply}' "$tmpdir/shelley-genesis.json" \
247+
> "$tmpdir/shelley.dataset.json"
248+
jq -c '{initialDReps}' "$tmpdir/conway-genesis.json" \
249+
> "$tmpdir/conway.dataset.json"
250+
# Ensure all key directories exist (even if `create-testnet-data` didn't create them)
251+
for keydir in byron-gen-command drep-keys pools-keys stake-delegators utxo-keys
252+
do
253+
mkdir -p "$tmpdir/$keydir"
254+
done
255+
# TODO: remove the raw genesis files produced by create-testnet-data ???
256+
# Move the whole tmpdir to the cache entry
257+
mkdir -p "$(dirname "$outdir")"
258+
mv "$tmpdir" "$outdir"
259+
trap - EXIT
260+
info genesis "dataset cached: $outdir"
261+
fi
262+
263+
echo "$outdir"
264+
}
265+
266+
# ==============================================================================
267+
# Protocol cache (level 2)
268+
# ==============================================================================
269+
270+
# Protocol cache key: slotLength, epochLength, activeSlotsCoeff (human-readable)
271+
# followed by a hash of all protocol parameter fields.
272+
protocol-cache-key() {
273+
local profile_json=$1
274+
local slot epoch active hash
275+
slot=$(jq -r '.genesis.shelley.slotLength' "$profile_json")
276+
epoch=$(jq -r '.genesis.shelley.epochLength' "$profile_json")
277+
active=$(jq -r '.genesis.shelley.activeSlotsCoeff' "$profile_json")
278+
hash=$(jq -c -S '{
279+
byron: ( .genesis.byron // {} ),
280+
shelley: ( .genesis.shelley ),
281+
alonzo: ( .genesis.alonzo ),
282+
conway: ( .genesis.conway // {} ),
283+
dijkstra: ( .genesis.dijkstra // {} )
284+
}' "$profile_json" | sha1sum | cut -c-7)
285+
echo "slot-${slot}-epoch-${epoch}-active-${active}-${hash}"
286+
}
287+
288+
# Ensure a protocol cache entry exists. If not, create it.
289+
# Returns the cache entry path on stdout.
290+
protocol-cache-ensure() {
291+
local profile_json=$1
292+
local outdir=$2
293+
294+
if test -d "${outdir}"
295+
then
296+
info genesis "protocol parameters cache hit: ${outdir}"
297+
else
298+
info genesis "protocol parameters cache miss: ${outdir}"
299+
mkdir -p "${outdir}"
300+
# Generate specs from profile and strip dataset fields + systemStart.
301+
# All specs are small (<10KB), using `jq` is fine here.
302+
# MUST be compact (-c) because the merge step uses sed 's/}$//' which
303+
# only works on single-line JSON.
304+
genesis spec byron "${profile_json}" \
305+
| jq -c 'del(.bootStakeholders, .heavyDelegation, .nonAvvmBalances, .startTime)' \
306+
> "${outdir}/byron.protocol.json"
307+
genesis spec shelley "${profile_json}" \
308+
| jq -c 'del(.initialFunds, .staking, .maxLovelaceSupply, .systemStart)' \
309+
> "${outdir}/shelley.protocol.json"
310+
genesis spec alonzo "${profile_json}" \
311+
| jq -c '.' \
312+
> "${outdir}/alonzo.protocol.json"
313+
genesis spec conway "${profile_json}" \
314+
| jq -c 'del(.initialDReps)' \
315+
> "${outdir}/conway.protocol.json"
316+
genesis spec dijkstra "${profile_json}" \
317+
| jq -c '.' \
318+
> "${outdir}/dijkstra.protocol.json"
319+
info genesis "protocol parameters cached: ${outdir}"
320+
fi
321+
322+
echo "${outdir}"
323+
}
324+

nix/workbench/genesis/genesis.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ global_genesis_format_version=October-13-2025
66
# Resolve genesis backend once (at source time, no output).
77
# Each backend file defines: spec-*, pool-relays-*, profile-cache-key-*,
88
# profile-cache-key-input-*, genesis-create-*, derive-from-cache-*
9-
if [[ ${WB_MODULAR_GENESIS:-0} -eq 1 ]]; then genesis_backend=modular
10-
else genesis_backend=jq
9+
if [[ ${WB_GENESIS_CHUNKS:-0} -eq 1 ]]; then genesis_backend=chunks
10+
elif [[ ${WB_MODULAR_GENESIS:-0} -eq 1 ]]; then genesis_backend=modular
11+
else genesis_backend=jq
1112
fi
1213

1314
usage_genesis() {

0 commit comments

Comments
 (0)