-
Notifications
You must be signed in to change notification settings - Fork 4
198 lines (178 loc) · 7.08 KB
/
Copy pathcpu-profile.yml
File metadata and controls
198 lines (178 loc) · 7.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
name: CPU profile capture
# One-shot V8 CPU profile capture for the SUT during a scaling-dive sweep.
# Used to answer "where do CPU cycles go at the cliff?" (#7756). Adds
# profiling overhead (~10-30%) so artifacts shouldn't be compared
# directly against the no-profile dive numbers; the cpuprofile is the
# deliverable, not the report.
on:
workflow_dispatch:
inputs:
core_ref:
description: 'ether/etherpad ref to test against'
default: 'develop'
type: string
sweep:
description: 'sweep spec to drive load while profiling'
default: 'authors=100..400:step=50:dwell=10s:warmup=3s'
type: string
settings_lever:
description: 'which dive lever to apply to settings (or "none")'
default: 'none'
type: choice
options: [none, new-changes-batch, engine-flush-defer]
permissions:
contents: read
jobs:
capture:
runs-on: ubuntu-latest
timeout-minutes: 45
env:
PNPM_HOME: /home/runner/setup-pnpm/node_modules/.bin
steps:
- name: Checkout etherpad-load-test
uses: actions/checkout@v7
with:
path: ./loadtest
- name: Checkout etherpad core (${{ inputs.core_ref }})
uses: actions/checkout@v7
with:
repository: ether/etherpad
ref: ${{ inputs.core_ref }}
path: ./etherpad
- uses: actions/setup-node@v4
with:
node-version: 25
- uses: pnpm/action-setup@v4
name: Install pnpm
with:
version: 10.33.2
run_install: false
- name: Get pnpm store directory
shell: bash
run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
- uses: actions/cache@v6
name: Setup pnpm cache
with:
path: ${{ env.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-store-
- name: Build and link etherpad-load-test
run: |
cd ./loadtest
pnpm install --frozen-lockfile
pnpm run build
pnpm link --global
- name: Install Etherpad core dependencies
run: |
cd ./etherpad
pnpm install --frozen-lockfile
- name: Patch settings.json
working-directory: ./etherpad
run: |
sed -e '/^ *"importExportRateLimiting":/,/^ *\}/ s/"max":.*/"max": 100000000/' -i settings.json.template
sed -e '
s!"loadTest":[^,]*!"loadTest": true!
s!"points":[^,]*!"points": 1000000!
' settings.json.template > settings.json
sed -i '/"loadTest": true,/a\ "scalingDiveMetrics": true,' settings.json
case "${{ inputs.settings_lever }}" in
none) echo "no lever applied — baseline" ;;
new-changes-batch)
sed -i '/"loadTest": true,/a\ "newChangesBatch": true,' settings.json
grep newChangesBatch settings.json
;;
engine-flush-defer)
sed -i '/"loadTest": true,/a\ "engineFlushDefer": true,' settings.json
grep engineFlushDefer settings.json
;;
*) echo "unknown lever"; exit 1 ;;
esac
- name: Start Etherpad with --cpu-prof (bypassing pnpm/cross-env wrappers)
working-directory: ./etherpad
run: |
mkdir -p /tmp/cpuprof
# Previous attempt (PR #110) wrapped `pnpm run prod` with NODE_OPTIONS=--cpu-prof.
# That produced 5 cpuprofile files — one for each Node in the chain
# (pnpm, cross-env wrapper, tsx) — and the SUT process was the
# newest match for `node.*server\.ts` (tsx loader) but came back
# 99.8% idle while another process pegged CPU. The wrapper chain
# made it impossible to know which file held the actual server
# samples.
#
# Fix: invoke node DIRECTLY (same command the prod script runs,
# minus pnpm + cross-env), so there's exactly one Node process
# under --cpu-prof and it IS the SUT. NODE_ENV is set inline
# instead of via cross-env.
cd src
NODE_ENV=production node \
--cpu-prof --cpu-prof-dir=/tmp/cpuprof \
--require tsx/cjs \
node/server.ts >../ep.log 2>&1 &
SUT_PID=$!
echo "$SUT_PID" > /tmp/cpuprof/sut.pid
echo "SUT pid: $SUT_PID"
cd ..
for i in $(seq 1 90); do
curl -sf http://127.0.0.1:9001/ >/dev/null && break
# Bail out early if the SUT died during boot.
kill -0 "$SUT_PID" 2>/dev/null || { echo "SUT exited during boot"; tail -n 100 ep.log; exit 1; }
sleep 1
done
curl -sf http://127.0.0.1:9001/ >/dev/null || { echo "Etherpad failed to start"; tail -n 100 ep.log; exit 1; }
echo "Etherpad up; SUT pid $SUT_PID under --cpu-prof"
- name: Run sweep
working-directory: ./loadtest
run: |
mkdir -p ./out
etherpad-loadtest \
http://127.0.0.1:9001 \
--sweep "${{ inputs.sweep }}" \
--report ./out \
--run-id "cpu-profile-$(echo '${{ inputs.core_ref }}' | tr '/' '-')" \
--force
- name: Stop Etherpad cleanly (flushes the cpuprofile to disk)
if: always()
run: |
# SIGTERM the SUT pid we recorded at boot so Node writes the
# .cpuprofile during clean shutdown. SIGKILL would lose it.
SUT_PID=$(cat /tmp/cpuprof/sut.pid 2>/dev/null || true)
if [ -n "$SUT_PID" ] && kill -0 "$SUT_PID" 2>/dev/null; then
echo "SIGTERM SUT pid $SUT_PID"
kill -SIGTERM "$SUT_PID"
else
echo "SUT pid $SUT_PID not alive; trying pkill fallback"
pkill -SIGTERM -f 'node.*server\.ts' || true
fi
# Give Node up to 20s to write the profile.
for i in $(seq 1 20); do
ls /tmp/cpuprof/*.cpuprofile 2>/dev/null && break
sleep 1
done
echo '--- /tmp/cpuprof contents ---'
ls -la /tmp/cpuprof || true
# Surface the per-file sizes so the analyst can spot the SUT
# quickly: the longest-running process produces the largest
# cpuprofile (samples accumulate over time).
for f in /tmp/cpuprof/*.cpuprofile; do
[ -f "$f" ] || continue
echo " $(stat -c '%s bytes' "$f") $(basename "$f")"
done
echo '--- sut.pid ---'
cat /tmp/cpuprof/sut.pid 2>/dev/null || true
echo '--- ep.log tail ---'
tail -n 80 ./etherpad/ep.log || true
- name: Upload CPU profile
if: always()
uses: actions/upload-artifact@v4
with:
name: cpuprofile-${{ inputs.settings_lever }}
path: /tmp/cpuprof/*.cpuprofile
if-no-files-found: error
- name: Upload sweep report
if: always()
uses: actions/upload-artifact@v4
with:
name: cpuprofile-report-${{ inputs.settings_lever }}
path: loadtest/out/
if-no-files-found: warn