Skip to content

Commit 0453603

Browse files
authored
Optimised database registration logic in CLEM workflow (#787)
* Use latest registered atlas for a particular data collection group as reference for when updating grid squares * Modified '_register_imaging_site' so that it returns the ImagingSite entry; modified the subsequent helper functions to use the returned ImagingSite entry instead of performing new queries * Removed excess cases of 'db.close()' and 'db.commit()'
1 parent 84e0b4a commit 0453603

2 files changed

Lines changed: 73 additions & 63 deletions

File tree

src/murfey/workflows/clem/register_preprocessing_results.py

Lines changed: 57 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,20 @@ def _register_clem_imaging_site(
118118
result: CLEMPreprocessingResult,
119119
murfey_db: Session,
120120
):
121-
def _register(
121+
"""
122+
Creates an ImagingSite database entry for the current CLEM preprocessing result
123+
if one doesn't already exist, or modifies the existing one if it does. Each entry
124+
corresponds to a unique site on the sample grid, and results containing denoised
125+
data will supersede existing rows for the same position that contain only raw
126+
data. Returns the created/queried entry.
127+
"""
128+
129+
def _populate(
122130
entry: MurfeyDB.ImagingSite,
123131
result: CLEMPreprocessingResult,
124132
):
125133
"""
126-
Helper function to update the ImagingSite column values with.
134+
Helper function to populate the ImagingSite column values.
127135
"""
128136

129137
# Is this an atlas or grid square
@@ -174,20 +182,20 @@ def _register(
174182
session_id=session_id,
175183
site_name=result.site_name,
176184
)
177-
clem_img_site = _register(clem_img_site, result)
185+
clem_img_site = _populate(clem_img_site, result)
178186

179187
# Prepare to overwrite existing entry if current result is a denoised dataset
180188
if result.is_denoised:
181189
# Proceed with overwrite if current result is different from existing entry
182190
output_file = list(result.output_files.values())[0]
183191
if str(output_file.parent / "*.tiff") != clem_img_site.image_path:
184-
clem_img_site = _register(clem_img_site, result)
192+
clem_img_site = _populate(clem_img_site, result)
185193

194+
# Commit changes and return entry
186195
murfey_db.add(clem_img_site)
187196
murfey_db.commit()
188-
murfey_db.close()
189-
190197
logger.info(f"CLEM preprocessing results registered for {result.series_name!r} ")
198+
return clem_img_site
191199

192200

193201
def _determine_collection_mode(
@@ -219,9 +227,15 @@ def _register_dcg_and_atlas(
219227
session_id: int,
220228
instrument_name: str,
221229
visit_name: str,
222-
result: CLEMPreprocessingResult,
230+
imaging_site: MurfeyDB.ImagingSite,
223231
murfey_db: Session,
224232
):
233+
"""
234+
Takes an ImagingSite entry and uses it to create and register DataCollectionGroup
235+
entries in ISPyB if they don't already exist, or to populate existing entries.
236+
After doing so, it will register the DataCollectionGroup ID in Murfey and add it
237+
to the ImagingSite entry.
238+
"""
225239
# Determine variables to register data collection group and atlas with
226240
proposal_code = "".join(char for char in visit_name.split("-")[0] if char.isalpha())
227241
proposal_number = "".join(
@@ -230,36 +244,29 @@ def _register_dcg_and_atlas(
230244
visit_number = visit_name.split("-")[-1]
231245

232246
# Generate name/tag for data colleciton group based on series name
233-
dcg_name = result.site_name.split("--")[0]
234-
if result.site_name.split("--")[1].isdigit():
235-
dcg_name += f"--{result.site_name.split('--')[1]}"
247+
dcg_name = imaging_site.site_name.split("--")[0]
248+
if imaging_site.site_name.split("--")[1].isdigit():
249+
dcg_name += f"--{imaging_site.site_name.split('--')[1]}"
236250

237251
# Determine values for atlas
238-
if result.is_atlas:
239-
output_file = list(result.output_files.values())[0]
240-
# Register the thumbnail entries if they are provided
241-
if result.thumbnails and result.thumbnail_size is not None:
242-
# Glob path to the thumbnail files
243-
thumbnail = list(result.thumbnails.values())[0]
244-
atlas_name = str(thumbnail.parent / "*.png")
245-
246-
# Work out the scaling factor used
247-
thumbnail_height, thumbnail_width = result.thumbnail_size
248-
scaling_factor = min(
249-
thumbnail_width / result.pixels_x,
250-
thumbnail_height / result.pixels_y,
251-
)
252-
atlas_pixel_size = result.pixel_size / scaling_factor
252+
if is_atlas := imaging_site.data_type == "atlas":
253+
# Register using thumbnail values if they are provided
254+
if (
255+
imaging_site.thumbnail_path is not None
256+
and imaging_site.thumbnail_pixel_size is not None
257+
):
258+
atlas_name: str | None = imaging_site.thumbnail_path
259+
atlas_pixel_size: float | None = imaging_site.thumbnail_pixel_size
253260
# Otherwise, register the TIFF files themselves
254261
else:
255-
atlas_name = str(output_file.parent / "*.tiff")
256-
atlas_pixel_size = result.pixel_size
262+
atlas_name = imaging_site.image_path
263+
atlas_pixel_size = imaging_site.image_pixel_size
257264
# Translate colour flags into ISPyB convention
258265
color_flags = {
259-
COLOR_FLAGS_MURFEY_TO_ISPYB[key]: int(value)
260-
for key, value in _get_color_flags(result.output_files.keys()).items()
266+
COLOR_FLAGS_MURFEY_TO_ISPYB[key]: getattr(imaging_site, key, 0)
267+
for key in COLOR_FLAGS_MURFEY_TO_ISPYB.keys()
261268
}
262-
collection_mode = _determine_collection_mode(result.output_files.keys())
269+
collection_mode = imaging_site.collection_mode
263270
else:
264271
atlas_name = ""
265272
atlas_pixel_size = 0.0
@@ -272,9 +279,8 @@ def _register_dcg_and_atlas(
272279
.where(MurfeyDB.DataCollectionGroup.tag == dcg_name)
273280
).all():
274281
dcg_entry = dcg_search[0]
275-
# Update atlas if registering atlas dataset
276-
# and data collection group already exists
277-
if result.is_atlas:
282+
# Update if current dataset is atlas and data collection group exists
283+
if is_atlas:
278284
atlas_message = {
279285
"session_id": session_id,
280286
"dcgid": dcg_entry.id,
@@ -330,51 +336,41 @@ def _register_dcg_and_atlas(
330336
.where(MurfeyDB.DataCollectionGroup.tag == dcg_name)
331337
).one()
332338

333-
if not (
334-
clem_img_site := murfey_db.exec(
335-
select(MurfeyDB.ImagingSite)
336-
.where(MurfeyDB.ImagingSite.session_id == session_id)
337-
.where(MurfeyDB.ImagingSite.site_name == result.site_name)
338-
).one_or_none()
339-
):
340-
clem_img_site = MurfeyDB.ImagingSite(
341-
session_id=session_id, site_name=result.site_name
342-
)
343-
344-
clem_img_site.dcg_id = dcg_entry.id
345-
clem_img_site.dcg_name = dcg_entry.tag
346-
murfey_db.add(clem_img_site)
339+
imaging_site.dcg_id = dcg_entry.id
340+
imaging_site.dcg_name = dcg_entry.tag
341+
murfey_db.add(imaging_site)
347342
murfey_db.commit()
348-
murfey_db.close()
349343

350344

351345
def _register_grid_square(
352346
session_id: int,
353-
result: CLEMPreprocessingResult,
347+
imaging_site: MurfeyDB.ImagingSite,
354348
murfey_db: Session,
355349
):
356350
# Skip this step if no transport manager object is configured
357351
if _transport_object is None:
358352
logger.error("Unable to find transport manager")
359353
return
360-
# Load all entries for the current data collection group
361-
dcg_name = result.site_name.split("--")[0]
362-
if result.site_name.split("--")[1].isdigit():
363-
dcg_name += f"--{result.site_name.split('--')[1]}"
354+
if (dcg_name := imaging_site.dcg_name) is None:
355+
logger.warning("Current imaging site has no data collection group name")
356+
return
364357

365358
# Check if an atlas has been registered
366359
if not (
367-
atlas_entry := murfey_db.exec(
360+
# Sort by ascending insertion order
361+
atlas_results := murfey_db.exec(
368362
select(MurfeyDB.ImagingSite)
369363
.where(MurfeyDB.ImagingSite.session_id == session_id)
370364
.where(MurfeyDB.ImagingSite.dcg_name == dcg_name)
371365
.where(MurfeyDB.ImagingSite.data_type == "atlas")
372-
).one_or_none()
366+
.order_by(MurfeyDB.ImagingSite.id)
367+
).all()
373368
):
374369
logger.info(
375370
f"No atlas has been registered for data collection group {dcg_name!r} yet"
376371
)
377372
return
373+
atlas_entry = atlas_results[-1] # Use the latest registered atlas
378374

379375
# Check if there are CLEM entries to register
380376
if clem_img_site_to_register := murfey_db.exec(
@@ -531,17 +527,17 @@ def _register_grid_square(
531527
image=grid_square_params.image,
532528
)
533529
murfey_db.add(grid_square_entry)
534-
murfey_db.commit()
535530

536531
# Add grid square ID to existing CLEM image series entry
537532
clem_img_site.grid_square_id = grid_square_entry.id
538533
murfey_db.add(clem_img_site)
539-
murfey_db.commit()
534+
535+
# Do one commit at the end
536+
murfey_db.commit()
540537
else:
541538
logger.info(
542539
f"No grid squares to register for data collection group {dcg_name!r} yet"
543540
)
544-
murfey_db.close()
545541
return
546542

547543

@@ -584,7 +580,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]:
584580
return {"success": False, "requeue": False}
585581
try:
586582
# Register items in Murfey database
587-
_register_clem_imaging_site(
583+
clem_img_site = _register_clem_imaging_site(
588584
session_id=session_id,
589585
result=result,
590586
murfey_db=murfey_db,
@@ -602,7 +598,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]:
602598
session_id=session_id,
603599
instrument_name=murfey_session.instrument_name,
604600
visit_name=murfey_session.visit,
605-
result=result,
601+
imaging_site=clem_img_site,
606602
murfey_db=murfey_db,
607603
)
608604
except Exception:
@@ -617,7 +613,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]:
617613
# Register CLEM image series as grid squares
618614
_register_grid_square(
619615
session_id=session_id,
620-
result=result,
616+
imaging_site=clem_img_site,
621617
murfey_db=murfey_db,
622618
)
623619
except Exception:

tests/workflows/clem/test_register_preprocessing_results.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,20 @@ def generate_preprocessing_messages(
6969
for n in range(3)
7070
]
7171
)
72+
# Add a second atlas dataset
73+
datasets.extend(
74+
[
75+
(
76+
grid_dir / "Overview 2" / "Image 1",
77+
False,
78+
True,
79+
(2400, 2400),
80+
1e-6,
81+
[0.002, 0.0044, 0.002, 0.0044],
82+
)
83+
]
84+
)
85+
# Add on metadata for denoised datasets
7286
datasets.extend(
7387
[
7488
(
@@ -398,7 +412,7 @@ def test_run_with_db(
398412
MurfeyDB.GridSquare.session_id == murfey_session.id
399413
)
400414
).all()
401-
assert len(murfey_gs_search) == (len(preprocessing_messages) - 1) // 2
415+
assert len(murfey_gs_search) == (len(preprocessing_messages) - 2) // 2
402416

403417
# ISPyB's DataCollectionGroup should have an entry
404418
murfey_dcg = murfey_dcg_search[0]
@@ -449,7 +463,7 @@ def test_run_with_db(
449463
.scalars()
450464
.all()
451465
)
452-
assert len(ispyb_gs_search) == (len(preprocessing_messages) - 1) // 2
466+
assert len(ispyb_gs_search) == (len(preprocessing_messages) - 2) // 2
453467
for gs in ispyb_gs_search:
454468
# Check that all entries point to the denoised images ("_Lng_LVCC")
455469
assert gs.gridSquareImage is not None and "_Lng_LVCC" in gs.gridSquareImage

0 commit comments

Comments
 (0)