Skip to content

Commit f4d7496

Browse files
SXT bugfixes and improvements (#801)
Various changes needed to make the SXT workflow smoother: Use olefile directly and remove the txrm2tiff dependency Read energy, tilt limits and tilt count from the txrm metadata and store them in the DataCollection ispyb table Skip processing of reference txrm files Allow processing of nested folders Set session file to None and then tag the DCG with the unaltered metadata path Set the data collection comment as any part of the file name before _angle The remaining issue is how to set the rsyncers correctly, which we need to discuss
1 parent ec0bc49 commit f4d7496

8 files changed

Lines changed: 196 additions & 116 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ smartem = [
7272
"smartem-decisions[backend]",
7373
]
7474
sxt = [
75-
"txrm2tiff",
75+
"numpy<3",
76+
"olefile",
7677
]
7778
[project.urls]
7879
Bug-Tracker = "https://github.com/DiamondLightSource/python-murfey/issues"

src/murfey/client/context.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def ensure_dcg_exists(
7676
token: str,
7777
) -> str | None:
7878
"""Create a data collection group"""
79+
session_file: Path | None = None
7980
if collection_type == "tomo":
8081
experiment_type_id = 36
8182
session_file = metadata_source / "Session.dm"
@@ -97,13 +98,18 @@ def ensure_dcg_exists(
9798
logger.warning(f"Get EPU session hook failed: {e}")
9899
elif collection_type == "sxt":
99100
experiment_type_id = 47
100-
session_file = metadata_source / "Session.dm"
101101
source_visit_dir = metadata_source.parent
102102
else:
103103
logger.error(f"Unknown collection type {collection_type}")
104104
return None
105105

106-
if not session_file.is_file():
106+
if session_file is None:
107+
dcg_tag = "/".join(metadata_source.parts).replace("//", "/")
108+
dcg_data = {
109+
"experiment_type_id": experiment_type_id,
110+
"tag": dcg_tag,
111+
}
112+
elif not session_file.is_file():
107113
logger.warning(f"Cannot find session file {str(session_file)}")
108114
dcg_tag = "/".join(
109115
[part for part in metadata_source.parts if part != environment.visit]

src/murfey/client/contexts/sxt.py

Lines changed: 94 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22
from pathlib import Path
33
from typing import Any
44

5-
from txrm2tiff.inspector import Inspector
6-
from txrm2tiff.txrm import open_txrm
7-
from txrm2tiff.txrm_functions.general import read_stream
8-
from txrm2tiff.xradia_properties.enums import XrmDataTypes
5+
import numpy as np
6+
from olefile import OleFileIO
97

108
from murfey.client.context import (
119
Context,
@@ -46,12 +44,9 @@ def register_sxt_data_collection(
4644
)
4745
return
4846
try:
49-
metadata_source = (
50-
self._basepath.parent / environment.visit / self._basepath.name
51-
)
5247
ensure_dcg_exists(
5348
collection_type="sxt",
54-
metadata_source=metadata_source,
49+
metadata_source=self._basepath,
5550
environment=environment,
5651
machine_config=self._machine_config,
5752
token=self._token,
@@ -66,12 +61,18 @@ def register_sxt_data_collection(
6661
"source": str(self._basepath),
6762
"tag": tilt_series,
6863
"pixel_size_on_image": str(
69-
data_collection_parameters.get("pixel_size", 100)
70-
),
64+
round(data_collection_parameters.get("pixel_size", 100), 2) * 1e-10
65+
), # expected in metres
7166
"image_size_x": data_collection_parameters.get("image_size_x", 0),
7267
"image_size_y": data_collection_parameters.get("image_size_y", 0),
7368
"magnification": data_collection_parameters.get("magnification", 0),
69+
"energy": data_collection_parameters.get("energy"),
7470
"voltage": 0,
71+
"axis_start": data_collection_parameters.get("minimum_angle"),
72+
"axis_end": data_collection_parameters.get("maximum_angle"),
73+
"tilt_series_length": data_collection_parameters.get(
74+
"tilt_series_length"
75+
),
7576
}
7677
capture_post(
7778
base_url=str(environment.url.geturl()),
@@ -127,84 +128,103 @@ def post_transfer(
127128
return False
128129

129130
# Read the tilt angles and pixel size from the txrm
130-
metadata = {
131+
metadata: dict[str, Any] = {
131132
"source": str(self._basepath),
132133
"tilt_series_tag": transferred_file.stem,
133134
}
134-
with open_txrm(
135-
transferred_file, load_images=False, load_reference=False, strict=False
136-
) as txrm:
137-
inspector = Inspector(txrm)
138-
angles = read_stream(
139-
inspector.txrm.ole,
140-
"ImageInfo/Angles",
141-
XrmDataTypes.XRM_FLOAT,
142-
strict=True,
143-
)
144-
if angles:
135+
with OleFileIO(str(transferred_file)) as txrm_ole:
136+
if txrm_ole.exists("ReferenceData/Image"):
137+
metadata["has_reference"] = True
138+
139+
if txrm_ole.exists("ImageInfo/Angles"):
140+
angles = np.frombuffer(
141+
txrm_ole.openstream("ImageInfo/Angles").getvalue(), np.float32
142+
).tolist()
145143
metadata["minimum_angle"] = min(angles)
146144
metadata["maximum_angle"] = max(angles)
147145

148-
pixel_size_txrm = read_stream(
149-
inspector.txrm.ole,
150-
"ImageInfo/PixelSize",
151-
XrmDataTypes.XRM_FLOAT,
152-
strict=True,
153-
)
154-
if pixel_size_txrm:
146+
if txrm_ole.exists("ImageInfo/PixelSize"):
147+
pixel_size_txrm = np.frombuffer(
148+
txrm_ole.openstream("ImageInfo/PixelSize").getvalue(),
149+
np.float32,
150+
).tolist()
155151
metadata["pixel_size"] = pixel_size_txrm[0] * 1e4
156152

157-
image_width_txrm = read_stream(
158-
inspector.txrm.ole,
159-
"ImageInfo/ImageWidth",
160-
XrmDataTypes.XRM_INT,
161-
strict=True,
162-
)
163-
if image_width_txrm:
153+
if txrm_ole.exists("ImageInfo/ImageWidth"):
154+
image_width_txrm = np.frombuffer(
155+
txrm_ole.openstream("ImageInfo/ImageWidth").getvalue(), np.int32
156+
).tolist()
164157
metadata["image_size_x"] = image_width_txrm[0]
165158

166-
image_height_txrm = read_stream(
167-
inspector.txrm.ole,
168-
"ImageInfo/ImageHeight",
169-
XrmDataTypes.XRM_INT,
170-
strict=True,
171-
)
172-
if image_height_txrm:
159+
if txrm_ole.exists("ImageInfo/ImageHeight"):
160+
image_height_txrm = np.frombuffer(
161+
txrm_ole.openstream("ImageInfo/ImageHeight").getvalue(),
162+
np.int32,
163+
).tolist()
173164
metadata["image_size_y"] = image_height_txrm[0]
174165

175-
exposure_time_txrm = read_stream(
176-
inspector.txrm.ole,
177-
"ImageInfo/ExpTimes",
178-
XrmDataTypes.XRM_FLOAT,
179-
strict=True,
180-
)
181-
if exposure_time_txrm:
166+
if txrm_ole.exists("ImageInfo/ExpTimes"):
167+
exposure_time_txrm = np.frombuffer(
168+
txrm_ole.openstream("ImageInfo/ExpTimes").getvalue(), np.float32
169+
).tolist()
182170
metadata["exposure_time"] = exposure_time_txrm[0]
183171

184-
magnification_txrm = read_stream(
185-
inspector.txrm.ole,
186-
"ImageInfo/XrayMagnification",
187-
XrmDataTypes.XRM_FLOAT,
188-
strict=True,
189-
)
190-
if magnification_txrm:
172+
if txrm_ole.exists("ImageInfo/XrayMagnification"):
173+
magnification_txrm = np.frombuffer(
174+
txrm_ole.openstream("ImageInfo/XrayMagnification").getvalue(),
175+
np.float32,
176+
).tolist()
191177
metadata["magnification"] = magnification_txrm[0]
192178

193-
tilt_count_txrm = read_stream(
194-
inspector.txrm.ole,
195-
"ImageInfo/ImagesTaken",
196-
XrmDataTypes.XRM_INT,
197-
strict=True,
198-
)
199-
if tilt_count_txrm:
200-
metadata["tilt_count"] = tilt_count_txrm[0]
179+
if txrm_ole.exists("ImageInfo/ImagesTaken"):
180+
tilt_count_txrm = np.frombuffer(
181+
txrm_ole.openstream("ImageInfo/ImagesTaken").getvalue(),
182+
np.int32,
183+
).tolist()
184+
metadata["tilt_series_length"] = tilt_count_txrm[0]
185+
186+
if txrm_ole.exists("PositionInfo/AxisNames") and txrm_ole.exists(
187+
"PositionInfo/MotorPositions"
188+
):
189+
# The ImageInfo/Energy field is empty
190+
# Instead it needs extracting from the PositionInfo list
191+
axis_names = [
192+
i
193+
for i in txrm_ole.openstream("PositionInfo/AxisNames")
194+
.read()
195+
.decode("ascii")
196+
.split("\x00")
197+
if i
198+
]
199+
axis_values = np.frombuffer(
200+
txrm_ole.openstream("PositionInfo/MotorPositions").getvalue(),
201+
np.float32,
202+
)
203+
if "Energy" in axis_names:
204+
energy_index = list(np.array(axis_names) == "Energy").index(
205+
True
206+
)
207+
metadata["energy"] = int(round(axis_values[energy_index]))
208+
209+
if not metadata.get("has_reference", False):
210+
logger.debug(f"Reference image {transferred_file} not processed")
211+
return True
201212

213+
visit_index = transferred_file.parent.parts.index(environment.visit)
214+
destination_search_dir = "/".join(
215+
transferred_file.parts[: visit_index + 2]
216+
).replace("//", "/")
202217
self.register_sxt_data_collection(
203218
tilt_series=transferred_file.stem,
204219
data_collection_parameters=metadata,
205220
file_extension=transferred_file.suffix,
206-
image_directory=environment.default_destinations.get(
207-
transferred_file.parent, transferred_file.parent
221+
image_directory=str(
222+
Path(
223+
environment.default_destinations.get(
224+
Path(destination_search_dir), destination_search_dir
225+
)
226+
)
227+
/ transferred_file.parent.relative_to(destination_search_dir)
208228
),
209229
environment=environment,
210230
)
@@ -227,11 +247,15 @@ def post_transfer(
227247
visit_name=environment.visit,
228248
session_id=environment.murfey_session,
229249
data={
230-
"session_id": environment.murfey_session,
231250
"tag": transferred_file.stem,
232-
"source": str(transferred_file.parent),
233-
"pixel_size": metadata.get("pixel_size", 100),
251+
"source": destination_search_dir,
252+
"pixel_size": round(
253+
metadata.get("pixel_size", 100), 2
254+
), # angstroms
234255
"tilt_offset": midpoint(angles),
256+
"tilt_series_length": metadata.get(
257+
"tilt_series_length", len(angles)
258+
),
235259
"txrm": str(file_transferred_to),
236260
},
237261
)

src/murfey/server/api/workflow.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,15 @@ class DCParameters(BaseModel):
290290
tag: str
291291
source: str
292292
magnification: float
293-
total_exposed_dose: Optional[float] = None
294-
c2aperture: Optional[float] = None
295-
exposure_time: Optional[float] = None
296-
slit_width: Optional[float] = None
293+
total_exposed_dose: float | None = None
294+
c2aperture: float | None = None
295+
exposure_time: float | None = None
296+
slit_width: float | None = None
297297
phase_plate: bool = False
298+
energy: float | None = None
299+
axis_start: float | None = None
300+
axis_end: float | None = None
301+
tilt_series_length: int | None = None
298302
data_collection_tag: str = ""
299303

300304

@@ -321,6 +325,7 @@ def start_dc(
321325
"image_directory": str(rsync_basepath / dc_params.image_directory),
322326
"start_time": str(datetime.now()),
323327
"voltage": dc_params.voltage,
328+
"energy": dc_params.energy,
324329
"pixel_size": str(float(dc_params.pixel_size_on_image) * 1e9),
325330
"image_suffix": dc_params.file_extension,
326331
"experiment_type": dc_params.experiment_type,
@@ -335,6 +340,9 @@ def start_dc(
335340
"exposure_time": dc_params.exposure_time,
336341
"slit_width": dc_params.slit_width,
337342
"phase_plate": dc_params.phase_plate,
343+
"axis_start": dc_params.axis_start,
344+
"axis_end": dc_params.axis_end,
345+
"tilt_series_length": dc_params.tilt_series_length,
338346
"session_id": session_id,
339347
}
340348

src/murfey/workflows/register_data_collection.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def run(message: dict, murfey_db: SQLModelSession) -> dict[str, bool]:
6565
imageDirectory=message["image_directory"],
6666
imageSuffix=message["image_suffix"],
6767
voltage=message["voltage"],
68+
wavelength=message["energy"],
6869
dataCollectionGroupId=dcgid,
6970
pixelSizeOnImage=message["pixel_size"],
7071
imageSizeX=message["image_size_x"],
@@ -75,13 +76,20 @@ def run(message: dict, murfey_db: SQLModelSession) -> dict[str, bool]:
7576
totalExposedDose=message.get("total_exposed_dose"),
7677
c2aperture=message.get("c2aperture"),
7778
phasePlate=int(message.get("phase_plate", 0)),
79+
axisStart=message.get("axis_start"),
80+
axisEnd=message.get("axis_end"),
81+
numberOfImages=message.get("tilt_series_length"),
7882
)
7983
dcid = _transport_object.do_insert_data_collection(
8084
record,
8185
tag=(
8286
message.get("tag")
8387
if message["experiment_type"] == "tomography"
84-
else ""
88+
else (
89+
message.get("tag", "").split("_angle")[0]
90+
if message["experiment_type"] == "sxt"
91+
else ""
92+
)
8593
),
8694
).get("return_value", None)
8795
if dcid is None:

src/murfey/workflows/sxt/process_sxt_tilt_series.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323

2424
class SXTTiltSeriesInfo(BaseModel):
25-
session_id: int
2625
tag: str
2726
source: str
2827
txrm: str
@@ -39,19 +38,22 @@ def process_sxt_tilt_series_workflow(
3938
):
4039
tilt_series_query = murfey_db.exec(
4140
select(TiltSeries)
42-
.where(TiltSeries.session_id == tilt_series_info.session_id)
41+
.where(TiltSeries.session_id == session_id)
4342
.where(TiltSeries.tag == tilt_series_info.tag)
4443
.where(TiltSeries.rsync_source == tilt_series_info.source)
4544
).all()
4645
if tilt_series_query:
4746
tilt_series = tilt_series_query[0]
47+
if tilt_series.processing_requested:
48+
logger.info(f"Tilt series {tilt_series.tag} has already been processed")
49+
return
4850
else:
4951
tilt_series = TiltSeries(
5052
session_id=session_id,
5153
tag=tilt_series_info.tag,
5254
rsync_source=tilt_series_info.source,
5355
tilt_series_length=tilt_series_info.tilt_series_length,
54-
processing_requested=True,
56+
processing_requested=False,
5557
)
5658
murfey_db.add(tilt_series)
5759
murfey_db.commit()
@@ -114,3 +116,6 @@ def process_sxt_tilt_series_workflow(
114116
logger.info(
115117
f"No transport object found. Zocalo message would be {sanitise(str(zocalo_message))}"
116118
)
119+
tilt_series.processing_requested = True
120+
murfey_db.add(tilt_series)
121+
murfey_db.commit()

0 commit comments

Comments
 (0)