|
| 1 | +""" |
| 2 | +Helpers specific to deal with data structures from Wright Group Bluesky deployment |
| 3 | +https://github.com/wright-group/bluesky-in-a-box/ |
| 4 | +""" |
| 5 | + |
| 6 | +import re |
| 7 | +import json |
| 8 | +import datetime |
| 9 | +import pathlib |
| 10 | +import logging |
| 11 | +from typing import NamedTuple |
| 12 | + |
| 13 | +from .._open import open as wt5_open |
| 14 | + |
| 15 | + |
| 16 | +__folder_parts__ = [ |
| 17 | + r"(?P<date>\d\d\d\d-\d\d-\d\d)", |
| 18 | + r"(?P<time>" + r"\d{5}" + ")", |
| 19 | + r"(?P<plan>\w*)", |
| 20 | + r"(?P<name>[\s\w\d.-]*)", # not great... |
| 21 | + r"(?P<uid>\w{8})" |
| 22 | +] |
| 23 | + |
| 24 | +__folder_seed__ = " ".join(__folder_parts__) |
| 25 | +__datetime_seed__ = re.compile(" ".join(__folder_parts__[:3])) |
| 26 | +__fseed__ = "{date} {time} {plan} {name} {uid}" |
| 27 | + |
| 28 | + |
| 29 | +class BlueskyFolder: |
| 30 | + """container class for Bluesky acquisitions""" |
| 31 | + |
| 32 | + def __init__(self, folder_path:pathlib.Path): |
| 33 | + self.path = folder_path |
| 34 | + self.info = parse_folder_name(folder_path.name) |
| 35 | + if self.info is None: |
| 36 | + return |
| 37 | + |
| 38 | + self._primary = None |
| 39 | + self.logger = logging.getLogger(self.info.uid) |
| 40 | + self.logger.info(self.info) |
| 41 | + |
| 42 | + @property |
| 43 | + def primary(self): |
| 44 | + """open procedure based on plan""" |
| 45 | + if self._primary is None: |
| 46 | + # TODO: open procedure based on plan |
| 47 | + self._primary = wt5_open(self.path / "primary.wt5") |
| 48 | + return self._primary |
| 49 | + |
| 50 | + @property |
| 51 | + def baseline(self): |
| 52 | + raise NotImplementedError |
| 53 | + |
| 54 | + @property |
| 55 | + def baseline_tree(self): |
| 56 | + return (self.path / "baseline tree.txt").read_text() |
| 57 | + |
| 58 | + @property |
| 59 | + def primary_tree(self): |
| 60 | + return (self.path / "primary tree.txt").read_text() |
| 61 | + |
| 62 | + @property |
| 63 | + def start(self) -> dict: |
| 64 | + path = self.path / "bluesky_docs" / "start.json" |
| 65 | + return json.load(path.open()) |
| 66 | + |
| 67 | + @property |
| 68 | + def stop(self) -> dict: |
| 69 | + path = self.path / "bluesky_docs" / "stop.json" |
| 70 | + return json.load(path.open()) |
| 71 | + |
| 72 | + @property |
| 73 | + def primary_descriptor(self) -> dict: |
| 74 | + path = self.path / "bluesky_docs" / "primary descriptor.json" |
| 75 | + return json.load(path.open()) |
| 76 | + |
| 77 | + @property |
| 78 | + def baseline_descriptor(self) -> dict: |
| 79 | + path = self.path / "bluesky_docs" / "baseline descriptor.json" |
| 80 | + return json.load(path.open()) |
| 81 | + |
| 82 | + |
| 83 | +class FolderInfo(NamedTuple): |
| 84 | + date: datetime.date |
| 85 | + time: datetime.time |
| 86 | + plan: str |
| 87 | + name: str |
| 88 | + uid: str |
| 89 | + |
| 90 | + @property |
| 91 | + def folder(self): |
| 92 | + return __fseed__.format( |
| 93 | + date=self.date.strftime("%Y-%m-%d"), |
| 94 | + time=int(datetime.timedelta( |
| 95 | + minutes=self.time.minute, |
| 96 | + seconds=self.time.second, |
| 97 | + hours=self.time.hour |
| 98 | + ).total_seconds()), |
| 99 | + plan=self.plan, |
| 100 | + name=self.name, |
| 101 | + uid=self.uid |
| 102 | + ) |
| 103 | + |
| 104 | + |
| 105 | +def match_identifier(dir:pathlib.Path, **bluesky_identifier) -> list[BlueskyFolder]: |
| 106 | + """walk a directory to find datasets that meet the criteria |
| 107 | + """ |
| 108 | + for key in bluesky_identifier.keys(): |
| 109 | + assert key in FolderInfo._fields |
| 110 | + |
| 111 | + keep = [] |
| 112 | + |
| 113 | + for info in map( |
| 114 | + lambda item: parse_folder_name(item.name), |
| 115 | + filter( |
| 116 | + lambda item: item.is_dir() and re.fullmatch(__folder_seed__, item.name), |
| 117 | + dir.iterdir() |
| 118 | + ) |
| 119 | + ): |
| 120 | + idict = info._asdict() |
| 121 | + if all(idict[k] == bluesky_identifier[k] for k in bluesky_identifier.keys()): |
| 122 | + keep.append(BlueskyFolder(dir / info.folder)) |
| 123 | + |
| 124 | + return keep |
| 125 | + |
| 126 | + |
| 127 | +def parse_folder_name(folder:str) -> FolderInfo | None: |
| 128 | + # TODO: match procedure is leaky (mainly due to name and plan), could be cleaned up |
| 129 | + if ((uid_match := re.fullmatch(r"(?P<uid>\w{8})", folder.split()[-1])) is not None) \ |
| 130 | + and ((datetime_match := __datetime_seed__.match(folder)) is not None): |
| 131 | + matchdict = uid_match.groupdict() | datetime_match.groupdict() |
| 132 | + matchdict["name"] = " ".join(folder.split()[3:-1]) |
| 133 | + return _to_object(matchdict) |
| 134 | + else: |
| 135 | + return None |
| 136 | + |
| 137 | + |
| 138 | +def _to_object(mdict:dict) -> FolderInfo: |
| 139 | + date = datetime.date.fromisoformat(mdict.pop("date")) |
| 140 | + ts = int(mdict.pop("time")) # total seconds since date start |
| 141 | + time = datetime.time(hour=ts // 3600, minute=(ts % 3600) // 60, second=ts % 60) |
| 142 | + return FolderInfo(date=date, time=time, **mdict) |
0 commit comments