Skip to content

Commit 8b64cad

Browse files
committed
Add lab-action spreadsheet upload flows
1 parent 62d060c commit 8b64cad

11 files changed

Lines changed: 1615 additions & 17 deletions

File tree

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ For normal user/admin behavior, the CLI, API, and GUI are alternate surfaces ove
8080

8181
Bloom exposes a FastAPI/Jinja GUI for internal operators. Current surfaces include dashboard/home, object search and details, container/content/equipment operations, graph views, auth/profile flows, the `/lab-actions` wet-lab action wizard, and the mounted TapDB GUI at `/tapdb` when configured by Dayhoff.
8282

83-
`/lab-actions` is the temporary operator flow for mapping incoming biospecimen tubes to extraction plates, sequencing-library plates, pool tubes, and sequencing run sets. It is backed by `/api/v1/lab-actions/*`; GUI actions should not have behavior that is unavailable through the API.
83+
`/lab-actions` is the temporary operator flow for mapping incoming biospecimen tubes to extraction plates, extraction QC plates, sequencing-library plates, pool tubes, generic lab/run sets, per-well associated data, and sequencing run sets. It also accepts `.csv` and `.xlsx` spreadsheet uploads for the same API-backed actions. GUI actions should not have behavior that is unavailable through the API.
8484

8585
See [`docs/lab_actions.md`](docs/lab_actions.md) for the template matrix, lineage contract, API examples, GUI flow, and production rollout checklist for this surface.
8686

@@ -93,9 +93,15 @@ The primary API is under `/api/v1/*`. Current route families include objects, co
9393
The lab-action route family includes:
9494

9595
- `POST /api/v1/lab-actions/extraction-plates`
96+
- `POST /api/v1/lab-actions/extraction-qc-plates`
9697
- `POST /api/v1/lab-actions/seq-library-plates`
9798
- `POST /api/v1/lab-actions/seq-library-pools`
9899
- `POST /api/v1/lab-actions/seq-runs`
100+
- `POST /api/v1/lab-actions/sets`
101+
- `GET /api/v1/lab-actions/sets/{set_euid}`
102+
- `POST /api/v1/lab-actions/sets/{set_euid}/members`
103+
- `POST /api/v1/lab-actions/plate-well-data`
104+
- `POST /api/v1/lab-actions/spreadsheet-import`
99105
- `GET /api/v1/lab-actions/seq-runs/{set_euid}/samplesheet`
100106
- `GET /api/v1/lab-actions/plates/{plate_euid}/mapping.csv`
101107
- `POST /api/v1/lab-actions/print-euids`
@@ -119,7 +125,7 @@ Deployed browser evidence should target `https://bloom.<deploy>.dev.lsmc.bio` an
119125

120126
- [`docs/apis.md`](docs/apis.md): API details.
121127
- [`docs/gui.md`](docs/gui.md): GUI routes and screenshots when current.
122-
- [`docs/lab_actions.md`](docs/lab_actions.md): extraction/library/pooling/sequencing-run action flow, template matrix, and production rollout checklist.
128+
- [`docs/lab_actions.md`](docs/lab_actions.md): extraction/QC/library/pooling/set/sequencing-run action flow, spreadsheet upload schemas, template matrix, and production rollout checklist.
123129
- [`docs/architecture.md`](docs/architecture.md): domain model and runtime boundaries.
124130
- [`docs/becoming_a_discoverable_service.md`](docs/becoming_a_discoverable_service.md): Dayhoff/Kahlo observability contract.
125131
- [`docs/plans/`](docs/plans/): active ledgers.

bloom_lims/api/v1/lab_actions.py

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44

55
import logging
66

7-
from fastapi import APIRouter, Depends, HTTPException
7+
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
88
from fastapi.responses import PlainTextResponse
99

1010
from bloom_lims.domain.lab_actions import LabActionsService
1111
from bloom_lims.schemas.lab_actions import (
1212
ExtractionPlateRequest,
13+
ExtractionQcPlateRequest,
14+
LabSetMembersRequest,
15+
LabSetRequest,
16+
PlateWellDataRequest,
1317
PrintEuidRequest,
1418
SeqLibraryPlateRequest,
1519
SeqLibraryPoolRequest,
@@ -53,6 +57,34 @@ async def create_extraction_plate(
5357
service.close()
5458

5559

60+
@router.post("/extraction-qc-plates")
61+
async def fill_extraction_qc_plate(
62+
payload: ExtractionQcPlateRequest,
63+
user: APIUser = Depends(require_write),
64+
):
65+
service = _service_for_user(user)
66+
try:
67+
return service.fill_extraction_qc_plate(payload)
68+
except Exception as exc:
69+
_raise_http(exc)
70+
finally:
71+
service.close()
72+
73+
74+
@router.post("/plate-well-data")
75+
async def attach_plate_well_data(
76+
payload: PlateWellDataRequest,
77+
user: APIUser = Depends(require_write),
78+
):
79+
service = _service_for_user(user)
80+
try:
81+
return service.attach_plate_well_data(payload)
82+
except Exception as exc:
83+
_raise_http(exc)
84+
finally:
85+
service.close()
86+
87+
5688
@router.post("/seq-library-plates")
5789
async def create_seq_library_plate(
5890
payload: SeqLibraryPlateRequest,
@@ -95,6 +127,49 @@ async def create_seq_run_set(
95127
service.close()
96128

97129

130+
@router.post("/sets")
131+
async def create_lab_set(
132+
payload: LabSetRequest,
133+
user: APIUser = Depends(require_write),
134+
):
135+
service = _service_for_user(user)
136+
try:
137+
return service.create_lab_set(payload)
138+
except Exception as exc:
139+
_raise_http(exc)
140+
finally:
141+
service.close()
142+
143+
144+
@router.get("/sets/{set_euid}")
145+
async def get_lab_set(
146+
set_euid: str,
147+
user: APIUser = Depends(require_read),
148+
):
149+
service = _service_for_user(user)
150+
try:
151+
return service.get_lab_set(set_euid)
152+
except Exception as exc:
153+
_raise_http(exc)
154+
finally:
155+
service.close()
156+
157+
158+
@router.post("/sets/{set_euid}/members")
159+
async def add_lab_set_members(
160+
set_euid: str,
161+
payload: LabSetMembersRequest,
162+
user: APIUser = Depends(require_write),
163+
):
164+
service = _service_for_user(user)
165+
try:
166+
return service.add_lab_set_members(set_euid, payload)
167+
except Exception as exc:
168+
_raise_http(exc)
169+
finally:
170+
service.close()
171+
172+
98173
@router.get("/seq-runs/{set_euid}/samplesheet")
99174
async def download_seq_run_sample_sheet(
100175
set_euid: str,
@@ -137,6 +212,26 @@ async def download_plate_mapping_csv(
137212
service.close()
138213

139214

215+
@router.post("/spreadsheet-import")
216+
async def import_lab_action_spreadsheet(
217+
file: UploadFile = File(...),
218+
dry_run: bool = Query(True),
219+
user: APIUser = Depends(require_write),
220+
):
221+
service = _service_for_user(user)
222+
try:
223+
data = await file.read()
224+
return service.import_spreadsheet(
225+
filename=file.filename or "upload.xlsx",
226+
data=data,
227+
dry_run=dry_run,
228+
)
229+
except Exception as exc:
230+
_raise_http(exc)
231+
finally:
232+
service.close()
233+
234+
140235
@router.post("/print-euids")
141236
async def print_euids(
142237
payload: PrintEuidRequest,
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
"""Spreadsheet parsing helpers for Bloom lab-action uploads."""
2+
3+
from __future__ import annotations
4+
5+
import csv
6+
import io
7+
import re
8+
import zipfile
9+
from dataclasses import dataclass
10+
from pathlib import PurePosixPath
11+
from typing import Any
12+
from xml.etree import ElementTree
13+
14+
15+
@dataclass(frozen=True)
16+
class ParsedSheet:
17+
name: str
18+
headers: list[str]
19+
rows: list[dict[str, Any]]
20+
21+
22+
_NS = {
23+
"main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
24+
"rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
25+
"pkgrel": "http://schemas.openxmlformats.org/package/2006/relationships",
26+
}
27+
28+
29+
def normalize_header(value: Any) -> str:
30+
"""Normalize operator spreadsheet headers to stable snake_case keys."""
31+
text = str(value or "").strip().lower()
32+
replacements = {
33+
"continer": "container",
34+
"conteiner": "container",
35+
"templae": "template",
36+
"destination": "target",
37+
"(r)": "",
38+
"(data)": "",
39+
}
40+
for bad, good in replacements.items():
41+
text = text.replace(bad, good)
42+
text = text.replace("/", " ")
43+
text = re.sub(r"[^a-z0-9]+", "_", text)
44+
text = re.sub(r"_+", "_", text).strip("_")
45+
return text
46+
47+
48+
def _cell_column_index(cell_ref: str) -> int:
49+
match = re.match(r"([A-Z]+)", cell_ref.upper())
50+
if not match:
51+
return 0
52+
value = 0
53+
for char in match.group(1):
54+
value = value * 26 + (ord(char) - ord("A") + 1)
55+
return value - 1
56+
57+
58+
def _xml_text(element: ElementTree.Element | None) -> str:
59+
if element is None:
60+
return ""
61+
return "".join(element.itertext())
62+
63+
64+
def _shared_strings(archive: zipfile.ZipFile) -> list[str]:
65+
try:
66+
root = ElementTree.fromstring(archive.read("xl/sharedStrings.xml"))
67+
except KeyError:
68+
return []
69+
strings: list[str] = []
70+
for item in root.findall("main:si", _NS):
71+
strings.append(_xml_text(item))
72+
return strings
73+
74+
75+
def _sheet_paths(archive: zipfile.ZipFile) -> list[tuple[str, str]]:
76+
workbook = ElementTree.fromstring(archive.read("xl/workbook.xml"))
77+
rels = ElementTree.fromstring(archive.read("xl/_rels/workbook.xml.rels"))
78+
rel_by_id = {
79+
rel.attrib["Id"]: rel.attrib["Target"]
80+
for rel in rels.findall("pkgrel:Relationship", _NS)
81+
}
82+
sheets: list[tuple[str, str]] = []
83+
for sheet in workbook.findall("main:sheets/main:sheet", _NS):
84+
rel_id = sheet.attrib.get(f"{{{_NS['rel']}}}id", "")
85+
target = rel_by_id.get(rel_id)
86+
if not target:
87+
continue
88+
clean_target = target.lstrip("/")
89+
path = (
90+
PurePosixPath(clean_target)
91+
if clean_target.startswith("xl/")
92+
else PurePosixPath("xl") / clean_target
93+
)
94+
sheets.append((sheet.attrib.get("name", "Sheet"), str(path)))
95+
return sheets
96+
97+
98+
def _parse_xlsx_sheet(
99+
archive: zipfile.ZipFile, path: str, shared_strings: list[str]
100+
) -> list[list[Any]]:
101+
root = ElementTree.fromstring(archive.read(path))
102+
rows: list[list[Any]] = []
103+
for row_el in root.findall(".//main:sheetData/main:row", _NS):
104+
values: list[Any] = []
105+
for cell in row_el.findall("main:c", _NS):
106+
col_index = _cell_column_index(cell.attrib.get("r", "A"))
107+
while len(values) <= col_index:
108+
values.append(None)
109+
cell_type = cell.attrib.get("t")
110+
value_el = cell.find("main:v", _NS)
111+
if cell_type == "s":
112+
raw = _xml_text(value_el)
113+
values[col_index] = (
114+
shared_strings[int(raw)] if raw and raw.isdigit() else raw
115+
)
116+
elif cell_type == "inlineStr":
117+
values[col_index] = _xml_text(cell.find("main:is", _NS))
118+
else:
119+
raw = _xml_text(value_el)
120+
values[col_index] = raw if raw != "" else None
121+
rows.append(values)
122+
return rows
123+
124+
125+
def _find_header_row(rows: list[list[Any]]) -> int | None:
126+
best_index = None
127+
best_score = 0
128+
for index, row in enumerate(rows[:20]):
129+
headers = [normalize_header(value) for value in row]
130+
non_empty = [header for header in headers if header]
131+
score = len(set(non_empty))
132+
if any(
133+
token in set(non_empty)
134+
for token in {
135+
"tube_euid",
136+
"container_euid",
137+
"source_well_euid",
138+
"pool_tube_euid",
139+
"annotation_template_euid",
140+
}
141+
):
142+
score += 10
143+
if score > best_score:
144+
best_score = score
145+
best_index = index
146+
return best_index if best_score >= 2 else None
147+
148+
149+
def _rows_to_sheet(name: str, rows: list[list[Any]]) -> ParsedSheet:
150+
header_index = _find_header_row(rows)
151+
if header_index is None:
152+
return ParsedSheet(name=name, headers=[], rows=[])
153+
headers = [normalize_header(value) for value in rows[header_index]]
154+
seen: dict[str, int] = {}
155+
unique_headers: list[str] = []
156+
for index, header in enumerate(headers):
157+
if not header:
158+
header = f"unnamed_{index + 1}"
159+
count = seen.get(header, 0)
160+
seen[header] = count + 1
161+
unique_headers.append(header if count == 0 else f"{header}_{count + 1}")
162+
parsed_rows: list[dict[str, Any]] = []
163+
for row in rows[header_index + 1 :]:
164+
item = {
165+
header: row[index] if index < len(row) else None
166+
for index, header in enumerate(unique_headers)
167+
}
168+
if any(value not in (None, "") for value in item.values()):
169+
parsed_rows.append(item)
170+
return ParsedSheet(name=name, headers=unique_headers, rows=parsed_rows)
171+
172+
173+
def parse_workbook(filename: str, data: bytes) -> list[ParsedSheet]:
174+
"""Parse CSV/XLSX data into normalized sheets.
175+
176+
This function intentionally avoids optional Excel dependencies so deployed
177+
Bloom containers can accept simple operator workbooks without changing the
178+
runtime package set.
179+
"""
180+
suffix = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
181+
if suffix == "csv":
182+
text = data.decode("utf-8-sig")
183+
reader = csv.reader(io.StringIO(text))
184+
return [_rows_to_sheet("CSV", list(reader))]
185+
if suffix != "xlsx":
186+
raise ValueError("Spreadsheet upload must be .csv or .xlsx")
187+
sheets: list[ParsedSheet] = []
188+
with zipfile.ZipFile(io.BytesIO(data)) as archive:
189+
shared_strings = _shared_strings(archive)
190+
for sheet_name, sheet_path in _sheet_paths(archive):
191+
rows = _parse_xlsx_sheet(archive, sheet_path, shared_strings)
192+
sheets.append(_rows_to_sheet(sheet_name, rows))
193+
return sheets

0 commit comments

Comments
 (0)