Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions src/segger/io/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,22 @@ class ISTPreprocessor(ABC):
transcript and boundary GeoDataFrames for the given platform.
"""

def __init__(self, data_dir: Path):
DEFAULT_MIN_QV: float | None = None

def __init__(self, data_dir: Path, min_qv: float | None = None):
"""
Parameters
----------
data_dir : Path
Path to the raw data directory for the spatial platform.
min_qv : float, optional
Minimum transcript quality to keep. Defaults to the platform's
``DEFAULT_MIN_QV`` (None = no quality filter).
"""
data_dir = Path(data_dir)
type(self)._validate_directory(data_dir)
self.data_dir = data_dir
self.min_qv = type(self).DEFAULT_MIN_QV if min_qv is None else min_qv

@staticmethod
@abstractmethod
Expand Down Expand Up @@ -352,6 +358,7 @@ class XeniumPreprocessor(ISTPreprocessor):
tx_fields = XeniumTranscriptFields()
bd_fields = XeniumBoundaryFields()
sw_version = lambda version: version[0] > 1
DEFAULT_MIN_QV: float = 20.0

@staticmethod
def _get_analysis_sw_version(data_dir: Path) -> str:
Expand Down Expand Up @@ -419,7 +426,7 @@ def transcripts(self) -> pl.DataFrame:
pl.col(raw.cell_id).cast(pl.Utf8),
)
# Filter data
.filter(pl.col(raw.quality) >= 20)
.filter(pl.col(raw.quality) >= self.min_qv)
.filter(pl.col(raw.feature).str.contains(
'|'.join(raw.filter_substrings)).not_()
)
Expand Down Expand Up @@ -564,7 +571,8 @@ def _infer_platform(data_dir: Path) -> str:

def get_preprocessor(
data_dir: Path,
platform: str | None = None
platform: str | None = None,
min_qv: float | None = None,
) -> ISTPreprocessor:
data_dir = Path(data_dir)
if platform is None:
Expand All @@ -575,4 +583,4 @@ def get_preprocessor(
f"Available: {list(PREPROCESSORS)}"
)
cls = PREPROCESSORS[platform.lower()]
return cls(data_dir)
return cls(data_dir, min_qv=min_qv)