Skip to content

Commit 78cc799

Browse files
Update version to 0.18.1 and enhance public API formatting (#68)
- Bump package version to 0.18.1. - Refactor __all__ in __init__.py for improved readability. - Update gene ID regex in validation.py to allow for versioned gene IDs. - Modify _id_like_percentage function to support random sampling with a seed for better statistical representation.
1 parent 27ae521 commit 78cc799

2 files changed

Lines changed: 17 additions & 6 deletions

File tree

cytetype/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.18.0"
1+
__version__ = "0.18.1"
22

33
import requests
44

@@ -8,7 +8,12 @@
88
from .preprocessing.marker_detection import rank_genes_groups_backed
99
from .preprocessing.subsampling import subsample_by_group
1010

11-
__all__ = ["CyteType", "marker_dotplot", "rank_genes_groups_backed", "subsample_by_group"]
11+
__all__ = [
12+
"CyteType",
13+
"marker_dotplot",
14+
"rank_genes_groups_backed",
15+
"subsample_by_group",
16+
]
1217

1318
_PYPI_JSON_URL = "https://pypi.org/pypi/cytetype/json"
1419

cytetype/preprocessing/validation.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import random
12
import re
3+
24
import anndata
35

46
from ..config import logger
@@ -19,7 +21,7 @@ def _is_gene_id_like(value: str) -> bool:
1921

2022
value = value.strip()
2123

22-
if re.match(r"^ENS[A-Z]*G\d{11}$", value, re.IGNORECASE):
24+
if re.match(r"^ENS[A-Z]*G\d{11}(\.\d+)?$", value, re.IGNORECASE):
2325
return True
2426

2527
if re.match(r"^[NX][MR]_\d+$", value):
@@ -75,11 +77,15 @@ def clean_gene_names(names: list[str]) -> list[str]:
7577
return cleaned
7678

7779

78-
def _id_like_percentage(values: list[str]) -> float:
80+
def _id_like_percentage(values: list[str], seed: int = 42) -> float:
7981
if not values:
8082
return 100.0
81-
n = min(500, len(values))
82-
sample = values[:n]
83+
n = min(2000, len(values))
84+
if n < len(values):
85+
rng = random.Random(seed)
86+
sample = rng.sample(values, n)
87+
else:
88+
sample = values
8389
return sum(1 for v in sample if _is_gene_id_like(v)) / n * 100
8490

8591

0 commit comments

Comments
 (0)