Skip to content

Commit a5c370b

Browse files
author
Alexandra Pavlyshina
committed
measure-evaluate: handle FHIR partial dates in flat views
1 parent 7709016 commit a5c370b

2 files changed

Lines changed: 313 additions & 19 deletions

File tree

aidbox-custom-operations/measure-evaluate/sql/01-views.sql

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,28 @@
77
-- - polymorphic fields: resource->'performed'->'dateTime' (not resource->>'performedDateTime')
88
-- - polymorphic fields: resource->'performed'->'Period'->>'start' (not resource->'performedPeriod'->>'start')
99

10+
-- ============================================================
11+
-- Partial-date helper
12+
-- FHIR R4 allows `dateTime` with partial precision: "2015" (year) or "2015-10"
13+
-- (year-month). A naïve cast `'2015-10'::timestamptz` raises
14+
-- "invalid input syntax for type timestamp with time zone".
15+
-- This helper pads partial dates to the start of the implied period:
16+
-- "2015" -> 2015-01-01T00:00:00Z
17+
-- "2015-10" -> 2015-10-01T00:00:00Z
18+
-- full date -> as-is
19+
-- Anything that doesn't look like a FHIR date returns NULL so that downstream
20+
-- WHERE filters exclude the row instead of erroring.
21+
-- ============================================================
22+
CREATE OR REPLACE FUNCTION parse_fhir_datetime(s text) RETURNS timestamptz AS $$
23+
SELECT CASE
24+
WHEN s IS NULL THEN NULL
25+
WHEN s ~ '^\d{4}-\d{2}-\d{2}' THEN s::timestamptz
26+
WHEN s ~ '^\d{4}-\d{2}$' THEN (s || '-01T00:00:00Z')::timestamptz
27+
WHEN s ~ '^\d{4}$' THEN (s || '-01-01T00:00:00Z')::timestamptz
28+
ELSE NULL
29+
END
30+
$$ LANGUAGE sql STABLE;
31+
1032
-- ============================================================
1133
-- Patient
1234
-- ============================================================
@@ -51,8 +73,8 @@ SELECT
5173
r.resource->>'status' AS status,
5274
r.resource->'type'->0->'coding'->0->>'system' AS type_system,
5375
r.resource->'type'->0->'coding'->0->>'code' AS type_code,
54-
(r.resource->'period'->>'start')::timestamptz AS period_start,
55-
(r.resource->'period'->>'end')::timestamptz AS period_end
76+
parse_fhir_datetime(r.resource->'period'->>'start') AS period_start,
77+
parse_fhir_datetime(r.resource->'period'->>'end') AS period_end
5678
FROM encounter r;
5779

5880
-- ============================================================
@@ -68,12 +90,12 @@ SELECT
6890
r.resource->'code'->'coding'->0->>'code' AS code,
6991
-- Aidbox polymorphic: performed -> {dateTime: '...'} or {Period: {start, end}}
7092
COALESCE(
71-
(r.resource->'performed'->>'dateTime')::timestamptz,
72-
(r.resource->'performed'->'Period'->>'start')::timestamptz
93+
parse_fhir_datetime(r.resource->'performed'->>'dateTime'),
94+
parse_fhir_datetime(r.resource->'performed'->'Period'->>'start')
7395
) AS performed_start,
7496
COALESCE(
75-
(r.resource->'performed'->>'dateTime')::timestamptz,
76-
(r.resource->'performed'->'Period'->>'end')::timestamptz
97+
parse_fhir_datetime(r.resource->'performed'->>'dateTime'),
98+
parse_fhir_datetime(r.resource->'performed'->'Period'->>'end')
7799
) AS performed_end
78100
FROM procedure r;
79101

@@ -91,12 +113,12 @@ SELECT
91113
r.resource->'category'->0->'coding'->0->>'code' AS category_code,
92114
-- Aidbox polymorphic: effective -> {dateTime: '...'} or {Period: {start, end}}
93115
COALESCE(
94-
(r.resource->'effective'->>'dateTime')::timestamptz,
95-
(r.resource->'effective'->'Period'->>'start')::timestamptz
116+
parse_fhir_datetime(r.resource->'effective'->>'dateTime'),
117+
parse_fhir_datetime(r.resource->'effective'->'Period'->>'start')
96118
) AS effective_start,
97119
COALESCE(
98-
(r.resource->'effective'->>'dateTime')::timestamptz,
99-
(r.resource->'effective'->'Period'->>'end')::timestamptz
120+
parse_fhir_datetime(r.resource->'effective'->>'dateTime'),
121+
parse_fhir_datetime(r.resource->'effective'->'Period'->>'end')
100122
) AS effective_end,
101123
-- Aidbox polymorphic: value -> {CodeableConcept: {coding: [...]}}
102124
r.resource->'value'->'CodeableConcept'->'coding'->0->>'system' AS value_system,
@@ -118,12 +140,12 @@ SELECT
118140
r.resource->'verificationStatus'->'coding'->0->>'code' AS verification_status,
119141
-- Aidbox polymorphic: onset -> {dateTime: '...'} or {Period: {start}}
120142
COALESCE(
121-
(r.resource->'onset'->>'dateTime')::timestamptz,
122-
(r.resource->'onset'->'Period'->>'start')::timestamptz
143+
parse_fhir_datetime(r.resource->'onset'->>'dateTime'),
144+
parse_fhir_datetime(r.resource->'onset'->'Period'->>'start')
123145
) AS onset_date,
124146
COALESCE(
125-
(r.resource->'abatement'->>'dateTime')::timestamptz,
126-
(r.resource->'abatement'->'Period'->>'end')::timestamptz
147+
parse_fhir_datetime(r.resource->'abatement'->>'dateTime'),
148+
parse_fhir_datetime(r.resource->'abatement'->'Period'->>'end')
127149
) AS abatement_date,
128150
r.resource->'category'->0->'coding'->0->>'code' AS category_code
129151
FROM condition r;
@@ -140,7 +162,7 @@ SELECT
140162
r.resource->>'intent' AS intent,
141163
r.resource->'code'->'coding'->0->>'system' AS code_system,
142164
r.resource->'code'->'coding'->0->>'code' AS code,
143-
(r.resource->>'authoredOn')::timestamptz AS authored_on
165+
parse_fhir_datetime(r.resource->>'authoredOn') AS authored_on
144166
FROM servicerequest r;
145167

146168
-- ============================================================
@@ -162,9 +184,9 @@ SELECT
162184
r.resource->'medication'->'CodeableConcept'->'coding'->0->>'code',
163185
m.resource->'code'->'coding'->0->>'code'
164186
) AS med_code,
165-
(r.resource->'dispenseRequest'->'validityPeriod'->>'start')::timestamptz AS validity_start,
166-
(r.resource->'dispenseRequest'->'validityPeriod'->>'end')::timestamptz AS validity_end,
167-
(r.resource->>'authoredOn')::timestamptz AS authored_on
187+
parse_fhir_datetime(r.resource->'dispenseRequest'->'validityPeriod'->>'start') AS validity_start,
188+
parse_fhir_datetime(r.resource->'dispenseRequest'->'validityPeriod'->>'end') AS validity_end,
189+
parse_fhir_datetime(r.resource->>'authoredOn') AS authored_on
168190
FROM medicationrequest r
169191
LEFT JOIN medication m ON m.id = r.resource->'medication'->'Reference'->>'id';
170192

@@ -181,5 +203,5 @@ SELECT
181203
-- Aidbox polymorphic: code -> {CodeableConcept: {coding: [...]}}
182204
r.resource->'code'->'CodeableConcept'->'coding'->0->>'system' AS code_system,
183205
r.resource->'code'->'CodeableConcept'->'coding'->0->>'code' AS code,
184-
(r.resource->>'authoredOn')::timestamptz AS authored_on
206+
parse_fhir_datetime(r.resource->>'authoredOn') AS authored_on
185207
FROM devicerequest r;
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
#!/usr/bin/env python3
2+
"""
3+
inject_partial_dates.py — maintainer test for FHIR partial-date handling.
4+
5+
The FHIR R4 spec allows `dateTime` with partial precision: "2015" (year only)
6+
or "2015-10" (year-month). Customer EHR systems routinely emit such partial
7+
dates for historical events where exact day isn't known.
8+
9+
The naive `01-views.sql` casts every date string to `timestamptz`, which fails:
10+
11+
ERROR: invalid input syntax for type timestamp with time zone: "2015-10"
12+
13+
This script reproduces the issue by injecting 9 corrupted patient copies
14+
(3 base patients × 3 partial-date patterns), tagged for safe cleanup. After
15+
loading, run any measure to see the cast failure; then apply the
16+
`parse_fhir_datetime` fix in `sql/01-views.sql` and re-run to verify.
17+
18+
Patterns applied:
19+
pd-ym — encounter.period.start = "2025-10" (year-month)
20+
pd-y — encounter.period.start = "2025" (year only)
21+
pd-cond — condition.onsetDateTime = "2020-08" (year-month on condition)
22+
23+
Usage:
24+
python3 tools/inject_partial_dates.py --base-url http://localhost:6888
25+
python3 tools/inject_partial_dates.py --cleanup --base-url http://localhost:6888
26+
"""
27+
28+
import argparse
29+
import base64
30+
import copy
31+
import json
32+
import os
33+
import sys
34+
import urllib.error
35+
import urllib.request
36+
37+
BASE_URL = "http://localhost:8888"
38+
USER = "root"
39+
PASS = "secret"
40+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
41+
DATA_DIR = os.path.join(SCRIPT_DIR, "..", "data")
42+
43+
TAG = {
44+
"system": "https://aidbox.app/sample/measure-evaluate",
45+
"code": "partial-date-test",
46+
}
47+
48+
PATTERNS = [
49+
{"suffix": "pd-ym", "target": "Encounter", "field": "period.start", "value": "2025-10", "label": "year-month on encounter.period.start"},
50+
{"suffix": "pd-y", "target": "Encounter", "field": "period.start", "value": "2025", "label": "year-only on encounter.period.start"},
51+
{"suffix": "pd-cond", "target": "Condition", "field": "onsetDateTime", "value": "2020-08", "label": "year-month on condition.onsetDateTime"},
52+
]
53+
54+
55+
def auth_header():
56+
return base64.b64encode(f"{USER}:{PASS}".encode()).decode()
57+
58+
59+
def http(method, path, body=None, timeout=180, as_json=True):
60+
url = f"{BASE_URL}{path}"
61+
data = json.dumps(body).encode() if body is not None else None
62+
req = urllib.request.Request(url, method=method, data=data)
63+
req.add_header("Authorization", f"Basic {auth_header()}")
64+
if data is not None:
65+
req.add_header("Content-Type", "application/json")
66+
resp = urllib.request.urlopen(req, timeout=timeout)
67+
raw = resp.read()
68+
if as_json and raw:
69+
return json.loads(raw)
70+
return raw
71+
72+
73+
def pick_base_patients():
74+
"""Pick 3 dqm-content patients with both Encounter and Condition resources."""
75+
path = os.path.join(DATA_DIR, "cms130-clinical-data.json")
76+
with open(path) as f:
77+
bundle = json.load(f)
78+
79+
by_patient = {}
80+
for entry in bundle.get("entry", []):
81+
r = entry.get("resource", {})
82+
rt = r.get("resourceType")
83+
if rt == "Patient":
84+
bucket = by_patient.setdefault(r["id"], {"patient": None, "encounters": [], "conditions": [], "others": []})
85+
bucket["patient"] = entry
86+
else:
87+
subject = (r.get("subject") or {}).get("reference", "")
88+
if subject.startswith("Patient/"):
89+
pid = subject.replace("Patient/", "")
90+
bucket = by_patient.setdefault(pid, {"patient": None, "encounters": [], "conditions": [], "others": []})
91+
if rt == "Encounter":
92+
bucket["encounters"].append(entry)
93+
elif rt == "Condition":
94+
bucket["conditions"].append(entry)
95+
else:
96+
bucket["others"].append(entry)
97+
98+
chosen = []
99+
for pid, bucket in by_patient.items():
100+
if bucket["patient"] and bucket["encounters"] and bucket["conditions"]:
101+
chosen.append((pid, bucket))
102+
if len(chosen) == 3:
103+
return chosen
104+
105+
# Fallback: relax — accept patients with encounter only
106+
for pid, bucket in by_patient.items():
107+
if bucket["patient"] and bucket["encounters"] and not any(p == pid for p, _ in chosen):
108+
chosen.append((pid, bucket))
109+
if len(chosen) == 3:
110+
return chosen
111+
return chosen[:3]
112+
113+
114+
def inject(base_bucket, pattern):
115+
"""Build a list of bundle entries — a copy of base_bucket with the pattern applied.
116+
Returns None if the pattern's target resource isn't in this bucket."""
117+
suffix = pattern["suffix"]
118+
target_rt = pattern["target"]
119+
field_path = pattern["field"].split(".")
120+
value = pattern["value"]
121+
122+
entries = [base_bucket["patient"]] + base_bucket["encounters"] + base_bucket["conditions"] + base_bucket["others"]
123+
entries = [copy.deepcopy(e) for e in entries]
124+
125+
id_map = {}
126+
for entry in entries:
127+
r = entry["resource"]
128+
old_id = r.get("id")
129+
if not old_id:
130+
continue
131+
new_id = f"{old_id}-{suffix}"
132+
id_map[old_id] = new_id
133+
r["id"] = new_id
134+
meta = r.setdefault("meta", {})
135+
meta.setdefault("tag", []).append(TAG)
136+
req = entry.get("request", {})
137+
if req.get("url", "").startswith(f"{r['resourceType']}/"):
138+
req["url"] = f"{r['resourceType']}/{new_id}"
139+
140+
def rewrite_refs(node):
141+
if isinstance(node, dict):
142+
ref = node.get("reference")
143+
if isinstance(ref, str) and "/" in ref:
144+
rt, rid = ref.split("/", 1)
145+
if rid in id_map:
146+
node["reference"] = f"{rt}/{id_map[rid]}"
147+
for v in node.values():
148+
rewrite_refs(v)
149+
elif isinstance(node, list):
150+
for item in node:
151+
rewrite_refs(item)
152+
153+
for e in entries:
154+
rewrite_refs(e["resource"])
155+
156+
# Apply the partial-date corruption to the first matching resource
157+
applied = False
158+
for e in entries:
159+
r = e["resource"]
160+
if r.get("resourceType") != target_rt:
161+
continue
162+
obj = r
163+
for p in field_path[:-1]:
164+
if not isinstance(obj, dict) or p not in obj:
165+
obj = None
166+
break
167+
obj = obj[p]
168+
if isinstance(obj, dict) and field_path[-1] in obj:
169+
obj[field_path[-1]] = value
170+
applied = True
171+
break
172+
# Try as scalar field on the resource itself (e.g., onsetDateTime is top-level)
173+
if isinstance(r, dict) and len(field_path) == 1 and field_path[0] in r:
174+
r[field_path[0]] = value
175+
applied = True
176+
break
177+
178+
return entries if applied else None
179+
180+
181+
def cleanup_partial_date_data():
182+
print("Cleaning up partial-date test data...")
183+
tag = f"{TAG['system']}|{TAG['code']}"
184+
total = 0
185+
for rt in ["Encounter", "Condition", "Observation", "Procedure",
186+
"ServiceRequest", "MedicationRequest", "DeviceRequest",
187+
"Coverage", "Patient", "Practitioner"]:
188+
deleted = 0
189+
try:
190+
while True:
191+
resp = http("GET", f"/fhir/{rt}?_tag={tag}&_count=500", timeout=60)
192+
entries = resp.get("entry", []) if isinstance(resp, dict) else []
193+
if not entries:
194+
break
195+
bundle = {
196+
"resourceType": "Bundle", "type": "transaction",
197+
"entry": [
198+
{"request": {"method": "DELETE", "url": f"{rt}/{e['resource']['id']}"}}
199+
for e in entries
200+
],
201+
}
202+
http("POST", "/fhir", bundle, timeout=120, as_json=False)
203+
deleted += len(entries)
204+
if len(entries) < 500:
205+
break
206+
if deleted:
207+
print(f" Deleted {deleted} {rt}")
208+
total += deleted
209+
except urllib.error.HTTPError as e:
210+
print(f" Skipped {rt}: HTTP {e.code}")
211+
except Exception as e:
212+
print(f" Skipped {rt}: {str(e)[:80]}")
213+
print(f"Cleanup complete: {total} resources removed.")
214+
215+
216+
def main():
217+
global BASE_URL
218+
p = argparse.ArgumentParser(description=__doc__,
219+
formatter_class=argparse.RawDescriptionHelpFormatter)
220+
p.add_argument("--base-url", default=BASE_URL)
221+
p.add_argument("--cleanup", action="store_true")
222+
args = p.parse_args()
223+
BASE_URL = args.base_url
224+
225+
if args.cleanup:
226+
cleanup_partial_date_data()
227+
return
228+
229+
print(f"Injecting partial-date test data on {BASE_URL}")
230+
231+
base = pick_base_patients()
232+
if not base:
233+
print("ERROR: could not pick base patients from cms130-clinical-data.json")
234+
sys.exit(2)
235+
236+
print(f"\n[1/2] Picked {len(base)} base patients:")
237+
for pid, bucket in base:
238+
print(f" {pid} ({len(bucket['encounters'])} encounter(s), {len(bucket['conditions'])} condition(s))")
239+
240+
print(f"\n[2/2] Generating and loading copies with partial dates:")
241+
loaded = 0
242+
skipped = 0
243+
for pid, bucket in base:
244+
for pattern in PATTERNS:
245+
entries = inject(bucket, pattern)
246+
if entries is None:
247+
print(f" SKIP {pid}-{pattern['suffix']}: no matching {pattern['target']}")
248+
skipped += 1
249+
continue
250+
bundle_body = {"resourceType": "Bundle", "type": "transaction", "entry": entries}
251+
try:
252+
http("POST", "/fhir", bundle_body, timeout=120, as_json=False)
253+
print(f" load {pid}-{pattern['suffix']}: {pattern['label']}")
254+
loaded += 1
255+
except urllib.error.HTTPError as e:
256+
body = e.read().decode()[:200]
257+
print(f" FAIL {pid}-{pattern['suffix']}: HTTP {e.code}: {body}")
258+
259+
print(f"\nLoaded {loaded} corrupted patient copies (skipped: {skipped}).")
260+
print()
261+
print("Verify the bug:")
262+
print(f" curl -u root:secret -X POST \\")
263+
print(f" '{BASE_URL}/Measure/$evaluate-measure?measure=cms130&reportType=population&periodStart=2026-01-01&periodEnd=2026-12-31'")
264+
print()
265+
print("Expected BEFORE fix: ERROR: invalid input syntax for type timestamp with time zone")
266+
print("Expected AFTER fix: normal MeasureReport (no error)")
267+
print()
268+
print(f"To remove the test data: python3 tools/inject_partial_dates.py --cleanup --base-url {BASE_URL}")
269+
270+
271+
if __name__ == "__main__":
272+
main()

0 commit comments

Comments
 (0)