Skip to content

Commit 014f10e

Browse files
committed
Rename toy dataset and fix small things
1 parent e973c1e commit 014f10e

2 files changed

Lines changed: 44 additions & 42 deletions

File tree

filter_engine.py

Lines changed: 44 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def _():
1818

1919
import yaml
2020
from opensyndrome.filter import OSDEngine, load_profile
21-
from opensyndrome.artifacts import get_schema_filepath, download_definitions, get_definition_dir
21+
from opensyndrome.artifacts import get_definition_dir
2222

2323
return (
2424
OSDEngine,
@@ -35,13 +35,19 @@ def _():
3535

3636
@app.cell
3737
def _(go, pl):
38-
def plot_cases(_df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"):
38+
def plot_cases(
39+
_df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"
40+
):
3941
_definitions_columns_sum = [
4042
pl.col(definition).sum().alias(definition) for definition in definitions
4143
]
4244
_agg_df = (
4345
_df_filtered.with_columns(
44-
pl.col(date_column).str.to_datetime(format=date_format, strict=False).cast(pl.Date).dt.truncate("1mo").alias("_month")
46+
pl.col(date_column)
47+
.str.to_datetime(format=date_format, strict=False)
48+
.cast(pl.Date)
49+
.dt.truncate("1mo")
50+
.alias("_month")
4551
)
4652
.group_by("_month")
4753
.agg(_definitions_columns_sum)
@@ -115,8 +121,8 @@ def _(mo):
115121
@app.cell
116122
def _(Path):
117123
EXAMPLE_DATASETS = {
118-
"Fake dataset - generated with ChatGPT": {
119-
"csv": Path("fake_dataset.csv"),
124+
"Toy dataset": {
125+
"csv": Path("toy_dataset.csv"),
120126
"mapping": Path("mapping.yaml"),
121127
"date_column": "recording_ts",
122128
},
@@ -160,11 +166,7 @@ def _(EXAMPLE_DATASETS, data_source, example_picker, pl, sample_file):
160166
else None
161167
)
162168
else:
163-
df_selected = (
164-
pl.read_csv(sample_file.contents())
165-
if sample_file.value
166-
else None
167-
)
169+
df_selected = pl.read_csv(sample_file.contents()) if sample_file.value else None
168170
return (df_selected,)
169171

170172

@@ -230,7 +232,11 @@ def _(df_selected, initial_date_column, initial_yaml, mo):
230232
"Select the date column separately for the time-series view.\n\n"
231233
f"Your dataset columns: `{_cols_hint}`"
232234
),
233-
mo.hstack([yaml_editor, mo.vstack([date_column_picker, date_format_input])], widths=[3, 1], align="start"),
235+
mo.hstack(
236+
[yaml_editor, mo.vstack([date_column_picker, date_format_input])],
237+
widths=[3, 1],
238+
align="start",
239+
),
234240
]
235241
)
236242
return date_column_picker, date_format_input, yaml_editor
@@ -246,15 +252,23 @@ def _(df_selected, load_profile, mo, yaml, yaml_editor):
246252
mo.stop(True, mo.callout(mo.md(f"**Invalid YAML:** {_e}"), kind="danger"))
247253

248254
if not _parsed["profiles"][0]["columns"]:
249-
mo.stop(True, mo.callout(mo.md(f"You need to map **at least one column**"), kind="danger"))
255+
mo.stop(
256+
True,
257+
mo.callout(mo.md("You need to map **at least one column**"), kind="danger"),
258+
)
250259

251260
not_found = []
252261
for declared_column in _parsed["profiles"][0]["columns"]:
253262
if declared_column not in df_selected.columns:
254263
not_found.append(declared_column)
255264

256265
if not_found:
257-
mo.stop(True, mo.callout(mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"))
266+
mo.stop(
267+
True,
268+
mo.callout(
269+
mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"
270+
),
271+
)
258272

259273
try:
260274
_profile_name = _parsed["profiles"][0]["name"]
@@ -285,7 +299,6 @@ def _(get_definition_dir):
285299
@app.cell
286300
def _(definition_options, json):
287301
def load_definition(name: str) -> dict:
288-
letter_dir = name[0].lower()
289302
return json.loads(definition_options[name].read_text())
290303

291304
return (load_definition,)
@@ -316,9 +329,7 @@ def _(
316329
profile,
317330
):
318331
mo.stop(
319-
df_selected is None
320-
or df_selected.is_empty()
321-
or not definitions_dropdown.value
332+
df_selected is None or df_selected.is_empty() or not definitions_dropdown.value
322333
)
323334

324335
definitions = definitions_dropdown.value
@@ -327,8 +338,7 @@ def _(
327338
engine = OSDEngine(profile, skip_unresolvable=True)
328339

329340
defs_dict = {
330-
name: json.loads(definition_options[name].read_text())
331-
for name in definitions
341+
name: json.loads(definition_options[name].read_text()) for name in definitions
332342
}
333343
df_filtered = engine.label(df_selected, defs_dict)
334344
return definitions, df_filtered
@@ -340,19 +350,19 @@ def _(definitions, df_filtered, df_selected, mo):
340350

341351
_cards = [
342352
mo.stat(
343-
label="Rows & columns",
353+
label="Syndromic Indicators",
354+
value=len(definitions),
355+
caption=", ".join([definition for definition in definitions]),
356+
bordered=True,
357+
),
358+
mo.stat(
359+
label="Rows",
344360
value=df_selected.shape[0],
345361
),
346362
mo.stat(
347363
label="Columns",
348364
value=df_selected.shape[1],
349365
),
350-
mo.stat(
351-
label="Syndromic Indicators",
352-
value=len(definitions),
353-
caption=", ".join([definition for definition in definitions]),
354-
bordered=True,
355-
),
356366
]
357367

358368
_title = "## Data with Open Syndrome Definitions"
@@ -376,7 +386,7 @@ def _(definitions, load_definition, mo):
376386
mo.md(
377387
"This section shows the definitions used to filter the data. You can use them to understand how the data was filtered and what criteria were applied. 🔎"
378388
),
379-
mo.ui.tabs(
389+
mo.accordion(
380390
{
381391
"JSONs": mo.accordion(
382392
{
@@ -429,21 +439,13 @@ def _(
429439
mo.vstack(
430440
[
431441
mo.md("## Time series"),
432-
plot_cases(df_filtered, definitions, date_column=date_column, date_format=date_format_input.value),
433-
*diagnosis_chart
434-
]
435-
)
436-
return
437-
438-
439-
@app.cell
440-
def _(definitions, df_filtered, df_selected, mo):
441-
mo.stop(df_selected is None or definitions is None or df_filtered is None)
442-
443-
mo.vstack(
444-
[
445-
mo.md("### **Data**"),
446-
mo.ui.dataframe(df_selected),
442+
plot_cases(
443+
df_filtered,
444+
definitions,
445+
date_column=date_column,
446+
date_format=date_format_input.value,
447+
),
448+
*diagnosis_chart,
447449
]
448450
)
449451
return
File renamed without changes.

0 commit comments

Comments
 (0)