|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""Cohort SQL perf benchmark for 12 measures on AME Aidbox. |
| 2 | +"""Aggregate SQL perf benchmark for 12 measures on AME Aidbox. |
3 | 3 |
|
4 | | -Mirrors aidbox-cql-poc/experiments/dba-optimization/measure_perf.py, but reads |
5 | | -measure SQL from this repo's sql/measures/cmsXXX/02-cmsXXX-measure.sql layout |
6 | | -and targets the AME Aidbox at http://localhost:8888 by default. |
| 4 | +Runs each measure's aggregate SQL (SELECT COUNT(*) ...) — the same query |
| 5 | +that $evaluate-measure summary mode executes in production. |
7 | 6 |
|
8 | 7 | Usage: |
9 | 8 | python3 tools/measure_perf.py --label before |
@@ -42,48 +41,55 @@ def run_sql(query: str, base_url: str, user: str, password: str, timeout: int = |
42 | 41 | return json.loads(resp.read()) |
43 | 42 |
|
44 | 43 |
|
45 | | -def build_cohort_sql(measure_id: str) -> str: |
| 44 | +def build_aggregate_sql(measure_id: str) -> str: |
| 45 | + """Return aggregate SQL that matches what $evaluate-measure summary mode runs.""" |
| 46 | + # CMS165: PL/pgSQL wrapper with SET LOCAL enable_nestloop = off |
| 47 | + if measure_id == "cms165": |
| 48 | + return (f"SELECT * FROM cms165_aggregate(" |
| 49 | + f"'{PERIOD_START}T00:00:00Z'::timestamptz," |
| 50 | + f"'{PERIOD_END}T23:59:59Z'::timestamptz)") |
| 51 | + # All others: raw aggregate SQL from file (SELECT COUNT(*) ...) |
46 | 52 | sql_path = ROOT / "sql" / "measures" / measure_id / f"02-{measure_id}-measure.sql" |
47 | 53 | sql = sql_path.read_text() |
48 | | - sql = em.parameterize_sql(sql, PERIOD_START, PERIOD_END) |
49 | | - return em.build_patient_sql(sql, None) |
| 54 | + return em.parameterize_sql(sql, PERIOD_START, PERIOD_END) |
50 | 55 |
|
51 | 56 |
|
52 | 57 | def measure_one(measure_id: str, base_url: str, user: str, password: str, |
53 | 58 | warm: bool = True, iterations: int = 1) -> dict: |
54 | | - """Run cohort SQL `iterations` times after one warmup. Report median+min+max.""" |
55 | | - cohort_sql = build_cohort_sql(measure_id) |
| 59 | + """Run aggregate SQL `iterations` times after one warmup. Report median+min+max.""" |
| 60 | + sql = build_aggregate_sql(measure_id) |
56 | 61 | if warm: |
57 | 62 | try: |
58 | | - run_sql(cohort_sql, base_url, user, password) |
| 63 | + run_sql(sql, base_url, user, password) |
59 | 64 | except Exception as e: |
60 | 65 | return {"error": f"warmup failed: {e}"} |
61 | 66 | samples: list[float] = [] |
62 | 67 | last_rows = None |
63 | 68 | for _ in range(iterations): |
64 | 69 | t0 = time.time() |
65 | 70 | try: |
66 | | - rows = run_sql(cohort_sql, base_url, user, password) |
| 71 | + rows = run_sql(sql, base_url, user, password) |
67 | 72 | except urllib.error.HTTPError as e: |
68 | 73 | return {"error": f"HTTP {e.code}: {e.read()[:200].decode(errors='replace')}"} |
69 | 74 | except Exception as e: |
70 | 75 | return {"error": str(e)[:200]} |
71 | 76 | samples.append((time.time() - t0) * 1000) |
72 | 77 | last_rows = rows |
73 | 78 | rows = last_rows or [] |
74 | | - n_rows = len(rows) if isinstance(rows, list) else 0 |
75 | | - n_ip = sum(1 for r in rows if r.get("ip")) |
76 | | - n_den = sum(1 for r in rows if r.get("den")) |
77 | | - n_exc = sum(1 for r in rows if r.get("exc")) |
78 | | - n_num = sum(1 for r in rows if r.get("num")) |
| 79 | + |
| 80 | + r = rows[0] if rows else {} |
| 81 | + n_ip = r.get("initial_population", 0) |
| 82 | + n_den = r.get("denominator", 0) |
| 83 | + n_exc = r.get("denominator_exclusion", 0) or 0 |
| 84 | + n_num = r.get("numerator", 0) or 0 |
| 85 | + |
79 | 86 | samples.sort() |
80 | 87 | median = samples[len(samples) // 2] |
81 | 88 | return { |
82 | 89 | "samples_ms": [round(s, 1) for s in samples], |
83 | 90 | "median_ms": round(median, 1), |
84 | 91 | "min_ms": round(min(samples), 1), |
85 | 92 | "max_ms": round(max(samples), 1), |
86 | | - "rows": n_rows, |
87 | 93 | "ip": n_ip, "den": n_den, "exc": n_exc, "num": n_num, |
88 | 94 | } |
89 | 95 |
|
@@ -117,7 +123,7 @@ def main(): |
117 | 123 | print(f" ERROR: {result['error']}") |
118 | 124 | else: |
119 | 125 | spread = f"({result['min_ms']:.0f}–{result['max_ms']:.0f})" |
120 | | - print(f" median={result['median_ms']:>7.1f}ms {spread:>14s} rows={result['rows']:6d} " |
| 126 | + print(f" median={result['median_ms']:>7.1f}ms {spread:>14s} " |
121 | 127 | f"ip={result['ip']}/den={result['den']}/exc={result['exc']}/num={result['num']}") |
122 | 128 |
|
123 | 129 | out_path = OUT_DIR / f"perf-{args.label}.json" |
|
0 commit comments