Skip to content

Commit 9f94dfb

Browse files
committed
Add chunked runner script for crash-resilient long runs
1 parent 95cbc82 commit 9f94dfb

1 file changed

Lines changed: 128 additions & 0 deletions

File tree

run.sh

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# FCR Simulator chunked runner
5+
# Runs the simulator in chunks so crashes don't lose all progress.
6+
# Completed chunk CSVs persist on disk regardless of later failures.
7+
8+
usage() {
9+
echo "Usage: $0 --start-epoch START --end-epoch END --parallel WORKERS --chunk-size EPOCHS --beacon-node-url URL [--cache-dir DIR] [--output-dir DIR]"
10+
exit 1
11+
}
12+
13+
# Defaults
14+
CACHE_DIR="$HOME/.cache/fcr-simulator"
15+
OUTPUT_DIR="./results"
16+
CHUNK_SIZE=1000
17+
PARALLEL=2
18+
BINARY="./lighthouse/target/release/fcr-simulator"
19+
20+
while [[ $# -gt 0 ]]; do
21+
case $1 in
22+
--start-epoch) START_EPOCH="$2"; shift 2 ;;
23+
--end-epoch) END_EPOCH="$2"; shift 2 ;;
24+
--parallel) PARALLEL="$2"; shift 2 ;;
25+
--chunk-size) CHUNK_SIZE="$2"; shift 2 ;;
26+
--beacon-node-url) BEACON_NODE_URL="$2"; shift 2 ;;
27+
--cache-dir) CACHE_DIR="$2"; shift 2 ;;
28+
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
29+
--binary) BINARY="$2"; shift 2 ;;
30+
*) echo "Unknown option: $1"; usage ;;
31+
esac
32+
done
33+
34+
if [[ -z "${START_EPOCH:-}" || -z "${END_EPOCH:-}" || -z "${BEACON_NODE_URL:-}" ]]; then
35+
usage
36+
fi
37+
38+
mkdir -p "$OUTPUT_DIR"
39+
40+
TOTAL_EPOCHS=$((END_EPOCH - START_EPOCH))
41+
TOTAL_CHUNKS=$(( (TOTAL_EPOCHS + CHUNK_SIZE - 1) / CHUNK_SIZE ))
42+
43+
echo "=== FCR Simulator Chunked Run ==="
44+
echo "Range: epoch $START_EPOCH - $END_EPOCH ($TOTAL_EPOCHS epochs)"
45+
echo "Chunk size: $CHUNK_SIZE epochs"
46+
echo "Total chunks: $TOTAL_CHUNKS"
47+
echo "Workers per chunk: $PARALLEL"
48+
echo "Output dir: $OUTPUT_DIR"
49+
echo ""
50+
51+
COMPLETED=0
52+
FAILED=0
53+
CURSOR=$START_EPOCH
54+
55+
while [[ $CURSOR -lt $END_EPOCH ]]; do
56+
CHUNK_END=$((CURSOR + CHUNK_SIZE))
57+
if [[ $CHUNK_END -gt $END_EPOCH ]]; then
58+
CHUNK_END=$END_EPOCH
59+
fi
60+
61+
CHUNK_NUM=$((COMPLETED + FAILED + 1))
62+
CHUNK_FILE="$OUTPUT_DIR/chunk-${CURSOR}-${CHUNK_END}.csv"
63+
64+
# Skip if this chunk already completed
65+
if [[ -f "$CHUNK_FILE" ]]; then
66+
LINES=$(wc -l < "$CHUNK_FILE")
67+
EXPECTED_SLOTS=$(( (CHUNK_END - CURSOR) * 32 ))
68+
# Header + at least 90% of expected slots = good enough
69+
if [[ $LINES -gt $((EXPECTED_SLOTS * 9 / 10)) ]]; then
70+
echo "[$CHUNK_NUM/$TOTAL_CHUNKS] Chunk $CURSOR-$CHUNK_END already complete ($LINES lines), skipping"
71+
COMPLETED=$((COMPLETED + 1))
72+
CURSOR=$CHUNK_END
73+
continue
74+
fi
75+
fi
76+
77+
echo "[$CHUNK_NUM/$TOTAL_CHUNKS] Running chunk: epoch $CURSOR - $CHUNK_END"
78+
79+
if "$BINARY" \
80+
--beacon-node-url "$BEACON_NODE_URL" \
81+
--start-epoch "$CURSOR" \
82+
--end-epoch "$CHUNK_END" \
83+
--output "$CHUNK_FILE" \
84+
--cache-dir "$CACHE_DIR" \
85+
--parallel "$PARALLEL"; then
86+
COMPLETED=$((COMPLETED + 1))
87+
echo "[$CHUNK_NUM/$TOTAL_CHUNKS] Chunk complete"
88+
else
89+
FAILED=$((FAILED + 1))
90+
echo "[$CHUNK_NUM/$TOTAL_CHUNKS] Chunk FAILED (exit code $?)"
91+
echo " Worker CSVs may be at: $OUTPUT_DIR/chunk-${CURSOR}-${CHUNK_END}.worker-*.csv"
92+
fi
93+
94+
CURSOR=$CHUNK_END
95+
done
96+
97+
echo ""
98+
echo "=== Summary ==="
99+
echo "Completed: $COMPLETED / $TOTAL_CHUNKS chunks"
100+
echo "Failed: $FAILED"
101+
102+
# Merge all completed chunks
103+
MERGED="$OUTPUT_DIR/merged.csv"
104+
echo ""
105+
echo "Merging completed chunks into $MERGED"
106+
107+
HEADER_WRITTEN=false
108+
for f in "$OUTPUT_DIR"/chunk-*.csv; do
109+
[[ -f "$f" ]] || continue
110+
# Skip worker files
111+
[[ "$f" == *worker* ]] && continue
112+
113+
if [[ "$HEADER_WRITTEN" == false ]]; then
114+
head -1 "$f" > "$MERGED"
115+
HEADER_WRITTEN=true
116+
fi
117+
tail -n +2 "$f" >> "$MERGED"
118+
done
119+
120+
if [[ -f "$MERGED" ]]; then
121+
TOTAL_SLOTS=$(($(wc -l < "$MERGED") - 1))
122+
CONFIRMED=$(awk -F, 'NR>1 && $5=="true"' "$MERGED" | wc -l)
123+
if [[ $TOTAL_SLOTS -gt 0 ]]; then
124+
RATE=$(echo "scale=2; $CONFIRMED * 100 / $TOTAL_SLOTS" | bc)
125+
echo "Total slots: $TOTAL_SLOTS"
126+
echo "Confirmed (delay<=1): $CONFIRMED ($RATE%)"
127+
fi
128+
fi

0 commit comments

Comments
 (0)