Skip to content

Commit 9a8fcf4

Browse files
Obsm in duckdb (#66)
* increased annotate timeout to 180 * coordinates in duckdb * version bump * dont offset cluster ids. use as is if short values
1 parent fab6711 commit 9a8fcf4

4 files changed

Lines changed: 25 additions & 4 deletions

File tree

cytetype/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.16.1"
1+
__version__ = "0.17.0"
22

33
import requests
44

cytetype/api/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def submit_annotation_job(
176176
transport = HTTPTransport(base_url, auth_token)
177177

178178
try:
179-
status_code, response = transport.post("annotate", payload, timeout=60)
179+
status_code, response = transport.post("annotate", payload, timeout=180)
180180
job_id = response.get("job_id")
181181

182182
if not job_id:

cytetype/core/artifacts.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ def save_features_matrix(
197197
def save_obs_duckdb(
198198
out_file: str,
199199
obs_df: pd.DataFrame,
200+
obsm_coordinates: np.ndarray | None = None,
201+
coordinates_key: str | None = None,
200202
table_name: str = "obs",
201203
threads: int = 4,
202204
memory_limit: str = "4GB",
@@ -208,6 +210,11 @@ def save_obs_duckdb(
208210
"Invalid table_name. Use letters, numbers, and underscores only."
209211
)
210212

213+
if obsm_coordinates is not None and coordinates_key is not None:
214+
obs_df = obs_df.copy()
215+
obs_df[f"__vis_coordinates_{coordinates_key}_1"] = obsm_coordinates[:, 0]
216+
obs_df[f"__vis_coordinates_{coordinates_key}_2"] = obsm_coordinates[:, 1]
217+
211218
dd_config: dict[str, Any] = {
212219
"threads": threads,
213220
"memory_limit": memory_limit,

cytetype/main.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,14 @@ def __init__(
132132
adata, group_key, rank_key, gene_symbols_column, coordinates_key
133133
)
134134

135+
# Use original labels as IDs if all are short (<=3 chars), otherwise enumerate
136+
_unique_group_categories: list[str | int] = natsorted(
137+
adata.obs[group_key].unique().tolist()
138+
)
139+
_short_ids = all(len(str(x)) <= 3 for x in _unique_group_categories)
135140
self.cluster_map = {
136-
str(x): str(n + 1)
137-
for n, x in enumerate(natsorted(adata.obs[group_key].unique().tolist()))
141+
str(x): str(x) if _short_ids else str(n)
142+
for n, x in enumerate(_unique_group_categories)
138143
}
139144
self.clusters = [
140145
self.cluster_map[str(x)] for x in adata.obs[group_key].values.tolist()
@@ -199,6 +204,7 @@ def _build_and_upload_artifacts(
199204
obs_duckdb_path: str,
200205
upload_timeout_seconds: int,
201206
upload_max_workers: int = 4,
207+
coordinates_key: str | None = None,
202208
) -> tuple[dict[str, str], list[tuple[str, Exception]]]:
203209
"""Build and upload each artifact as an independent unit.
204210
@@ -240,9 +246,16 @@ def _build_and_upload_artifacts(
240246
# --- obs.duckdb (save then upload) ---
241247
try:
242248
logger.info("Saving obs.duckdb artifact from observation metadata...")
249+
obsm_coordinates = (
250+
self.adata.obsm[coordinates_key]
251+
if coordinates_key and coordinates_key in self.adata.obsm
252+
else None
253+
)
243254
save_obs_duckdb_file(
244255
out_file=obs_duckdb_path,
245256
obs_df=self.adata.obs,
257+
obsm_coordinates=obsm_coordinates,
258+
coordinates_key=coordinates_key,
246259
)
247260
logger.info("Uploading obs.duckdb artifact...")
248261
obs_upload = upload_obs_duckdb_file(
@@ -394,6 +407,7 @@ def run(
394407
obs_duckdb_path=obs_duckdb_path,
395408
upload_timeout_seconds=upload_timeout_seconds,
396409
upload_max_workers=upload_max_workers,
410+
coordinates_key=self.coordinates_key,
397411
)
398412
if uploaded_file_refs:
399413
payload["uploaded_files"] = uploaded_file_refs

0 commit comments

Comments
 (0)