Skip to content

Commit d5e90ed

Browse files
author
Alexandra Pavlyshina
committed
measure-evaluate: bundle performance indexes for production-scale installs
1 parent ba68086 commit d5e90ed

2 files changed

Lines changed: 86 additions & 11 deletions

File tree

aidbox-custom-operations/measure-evaluate/setup.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,18 +174,16 @@ def execute_sql_file(filepath, label):
174174
if not os.path.exists(filepath):
175175
print(f" SKIP {label} — not found")
176176
return
177-
178177
with open(filepath) as f:
179178
content = f.read()
180-
181-
statements = [s.strip() for s in content.split(";") if s.strip()]
182-
for stmt in statements:
183-
try:
184-
run_sql(stmt)
185-
except Exception as e:
186-
print(f" FAIL {label}: {str(e)[:80]}")
187-
return
188-
print(f" OK {label}{len(statements)} statements")
179+
try:
180+
# Aidbox $sql wraps the whole call in one transaction — a mid-file
181+
# error rolls back every preceding statement, so this is safe to retry.
182+
run_sql(content)
183+
# Rough count of '`;`' for the log line — not used for execution, just diagnostics.
184+
print(f" OK {label}{content.count(';')} SQL statements")
185+
except Exception as e:
186+
print(f" FAIL {label}: {str(e)[:200]}")
189187

190188

191189
def main():
@@ -230,11 +228,12 @@ def main():
230228
print(" FAIL — Aidbox not responding. Is it running and activated?")
231229
sys.exit(1)
232230

233-
# Create shared views + concepts table + shared exclusion functions
231+
# Create shared views + concepts table + shared exclusion functions + indexes
234232
print("[2/5] Creating shared SQL infrastructure...")
235233
execute_sql_file(os.path.join(SCRIPT_DIR, "sql", "00-terminology.sql"), "Concepts table")
236234
execute_sql_file(os.path.join(SCRIPT_DIR, "sql", "01-views.sql"), "Shared views")
237235
execute_sql_file(os.path.join(SCRIPT_DIR, "sql", "02-shared-exclusions.sql"), "Shared exclusions")
236+
execute_sql_file(os.path.join(SCRIPT_DIR, "sql", "03-performance.sql"), "Performance indexes")
238237

239238
# Create stubs
240239
print("[3/5] Creating stub resources...")
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
-- Performance Optimizations for Measure SQL
2+
-- Run AFTER 01-views.sql and 02-shared-exclusions.sql
3+
-- Safe to re-run (all CREATE INDEX IF NOT EXISTS, ANALYZE is idempotent)
4+
--
5+
-- Contains:
6+
-- 1. Btree indexes on FHIR resource JSONB paths (speeds up flat view scans)
7+
-- 2. ANALYZE to update planner statistics
8+
--
9+
-- Note on date-range indexes:
10+
-- We do NOT index timestamp-cast expressions like ((resource->'period'->>'start')::timestamptz),
11+
-- because Postgres rejects them ("functions in index expression must be marked IMMUTABLE":
12+
-- text->timestamp casts depend on the DateStyle GUC, hence STABLE not IMMUTABLE).
13+
-- In practice this does not matter for single-patient or small-cohort evaluation:
14+
-- once the subject indexes below limit the row set to one patient, an in-memory
15+
-- filter on dates is cheap. Date-range scans on the full table remain unindexed —
16+
-- for population-level evaluation at very large scale, plan a follow-up pass with
17+
-- to_timestamp(text, 'YYYY-MM-DD"T"HH24:MI:SS') or a maintained timestamp column.
18+
19+
-- ============================================================
20+
-- 1. Indexes on FHIR resource tables
21+
-- These speed up the flat views by allowing index scans on JSONB paths.
22+
-- Named with ix_ prefix to distinguish from Aidbox's own indexes.
23+
-- ============================================================
24+
25+
-- Patient: birthDate (age calculations), gender
26+
CREATE INDEX IF NOT EXISTS ix_patient_birthdate ON patient ((resource->>'birthDate'));
27+
CREATE INDEX IF NOT EXISTS ix_patient_gender ON patient ((resource->>'gender'));
28+
29+
-- status columns are deliberately NOT indexed across all resources: the column
30+
-- has 3–5 distinct values in real data with one value (finished / final /
31+
-- completed / active) covering 90 %+ of rows, so the planner will prefer a
32+
-- sequential scan over a low-selectivity btree.
33+
34+
-- Encounter: subject, type coding
35+
CREATE INDEX IF NOT EXISTS ix_encounter_subject ON encounter ((resource->'subject'->>'id'));
36+
CREATE INDEX IF NOT EXISTS ix_encounter_type_code ON encounter ((resource->'type'->0->'coding'->0->>'system'), (resource->'type'->0->'coding'->0->>'code'));
37+
38+
-- Observation: subject, code, value code
39+
CREATE INDEX IF NOT EXISTS ix_observation_subject ON observation ((resource->'subject'->>'id'));
40+
CREATE INDEX IF NOT EXISTS ix_observation_code ON observation ((resource->'code'->'coding'->0->>'system'), (resource->'code'->'coding'->0->>'code'));
41+
CREATE INDEX IF NOT EXISTS ix_observation_value_code ON observation ((resource->'value'->'CodeableConcept'->'coding'->0->>'code')) WHERE resource->'value'->'CodeableConcept' IS NOT NULL;
42+
43+
-- Condition: subject, code
44+
CREATE INDEX IF NOT EXISTS ix_condition_subject ON condition ((resource->'subject'->>'id'));
45+
CREATE INDEX IF NOT EXISTS ix_condition_code ON condition ((resource->'code'->'coding'->0->>'system'), (resource->'code'->'coding'->0->>'code'));
46+
47+
-- Procedure: subject, code
48+
CREATE INDEX IF NOT EXISTS ix_procedure_subject ON procedure ((resource->'subject'->>'id'));
49+
CREATE INDEX IF NOT EXISTS ix_procedure_code ON procedure ((resource->'code'->'coding'->0->>'system'), (resource->'code'->'coding'->0->>'code'));
50+
51+
-- ServiceRequest: subject, code
52+
CREATE INDEX IF NOT EXISTS ix_servicerequest_subject ON servicerequest ((resource->'subject'->>'id'));
53+
CREATE INDEX IF NOT EXISTS ix_servicerequest_code ON servicerequest ((resource->'code'->'coding'->0->>'system'), (resource->'code'->'coding'->0->>'code'));
54+
55+
-- MedicationRequest: subject, medication code
56+
CREATE INDEX IF NOT EXISTS ix_medrq_subject ON medicationrequest ((resource->'subject'->>'id'));
57+
CREATE INDEX IF NOT EXISTS ix_medrq_med_code ON medicationrequest ((resource->'medication'->'CodeableConcept'->'coding'->0->>'system'), (resource->'medication'->'CodeableConcept'->'coding'->0->>'code'));
58+
59+
-- DeviceRequest: subject, code
60+
CREATE INDEX IF NOT EXISTS ix_devicerq_subject ON devicerequest ((resource->'subject'->>'id'));
61+
CREATE INDEX IF NOT EXISTS ix_devicerq_code ON devicerequest ((resource->'code'->'CodeableConcept'->'coding'->0->>'system'), (resource->'code'->'CodeableConcept'->'coding'->0->>'code'));
62+
63+
-- ============================================================
64+
-- 2. Update planner statistics
65+
-- On a fresh sample these complete in seconds. On a production-scale install
66+
-- with millions of rows each ANALYZE may take minutes — budget accordingly.
67+
-- ============================================================
68+
ANALYZE concepts;
69+
ANALYZE patient;
70+
ANALYZE encounter;
71+
ANALYZE observation;
72+
ANALYZE condition;
73+
ANALYZE procedure;
74+
ANALYZE servicerequest;
75+
ANALYZE medicationrequest;
76+
ANALYZE devicerequest;

0 commit comments

Comments
 (0)