Skip to content

Commit fbbb9b2

Browse files
author
Alexandra Pavlyshina
committed
scale_test: parallel loading (3x faster) + resource count verification
1 parent f98886c commit fbbb9b2

1 file changed

Lines changed: 49 additions & 9 deletions

File tree

aidbox-custom-operations/measure-evaluate/tools/scale_test.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import argparse
2424
import base64
25+
import concurrent.futures
2526
import copy
2627
import json
2728
import os
@@ -146,6 +147,13 @@ def load_copy(measure_id, copy_index):
146147
with open(path) as f:
147148
bundle = json.load(f)
148149
bundle = make_copy(bundle, copy_index)
150+
for attempt in range(3):
151+
try:
152+
http("POST", "/fhir", bundle, timeout=180, as_json=False)
153+
return len(bundle.get("entry", []))
154+
except urllib.error.HTTPError:
155+
if attempt < 2:
156+
time.sleep(0.5)
149157
http("POST", "/fhir", bundle, timeout=180, as_json=False)
150158
return len(bundle.get("entry", []))
151159

@@ -275,25 +283,57 @@ def main():
275283
print(f" Measures: {', '.join(measures)}")
276284
print()
277285

278-
# Phase 1 — load multiplied clinical data
286+
# Phase 1 — load multiplied clinical data (parallel across measures)
279287
print("[1/2] Loading multiplied clinical data...")
280288
total = 0
281289
t0 = time.time()
282-
for m in measures:
283-
per_measure = 0
290+
results = {}
291+
292+
def load_measure_copies(m):
293+
n = 0
284294
for i in range(1, args.multiplier + 1):
285295
try:
286-
per_measure += load_copy(m, i)
287-
except urllib.error.HTTPError as e:
288-
print(f" FAIL {m} copy {i}: HTTP {e.code}")
289-
break
296+
n += load_copy(m, i)
290297
except Exception as e:
291298
print(f" FAIL {m} copy {i}: {str(e)[:80]}")
292299
break
293-
print(f" {m}: {per_measure} resources across {args.multiplier} copies")
294-
total += per_measure
300+
return m, n
301+
302+
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as ex:
303+
for m, n in ex.map(load_measure_copies, measures):
304+
results[m] = n
305+
total += n
306+
print(f" {m}: {n} resources across {args.multiplier} copies")
295307
print(f" Total: {total} resources in {time.time() - t0:.1f}s")
296308
print(f" Patient count now: {patient_count()}")
309+
310+
# Verify: count resources per type in DB vs expected
311+
print(" Verifying resource counts...")
312+
by_type = {}
313+
for m in measures:
314+
path = os.path.join(DATA_DIR, f"{m}-clinical-data.json")
315+
if not os.path.exists(path):
316+
continue
317+
with open(path) as f:
318+
bundle = json.load(f)
319+
for entry in bundle.get("entry", []):
320+
r = entry.get("resource", {})
321+
rt = r.get("resourceType")
322+
rid = r.get("id")
323+
if rt and rid:
324+
by_type.setdefault(rt, set()).add(rid)
325+
ok = True
326+
for rt, ids in sorted(by_type.items()):
327+
expected = len(ids) * (1 + args.multiplier)
328+
try:
329+
actual = http("POST", "/$sql", [f"SELECT count(*) AS n FROM {rt.lower()}"])[0]["n"]
330+
except Exception:
331+
actual = 0
332+
if actual < expected:
333+
print(f" WARN {rt}: expected {expected}, got {actual} (missing {expected - actual})")
334+
ok = False
335+
if ok:
336+
print(" OK — all resource counts match")
297337
print()
298338

299339
# Phase 2 — measure timings

0 commit comments

Comments
 (0)