diff --git a/src/segger/io/preprocessor.py b/src/segger/io/preprocessor.py index 9bbe62d..24ad48f 100644 --- a/src/segger/io/preprocessor.py +++ b/src/segger/io/preprocessor.py @@ -63,16 +63,22 @@ class ISTPreprocessor(ABC): transcript and boundary GeoDataFrames for the given platform. """ - def __init__(self, data_dir: Path): + DEFAULT_MIN_QV: float | None = None + + def __init__(self, data_dir: Path, min_qv: float | None = None): """ Parameters ---------- data_dir : Path Path to the raw data directory for the spatial platform. + min_qv : float, optional + Minimum transcript quality to keep. Defaults to the platform's + ``DEFAULT_MIN_QV`` (None = no quality filter). """ data_dir = Path(data_dir) type(self)._validate_directory(data_dir) self.data_dir = data_dir + self.min_qv = type(self).DEFAULT_MIN_QV if min_qv is None else min_qv @staticmethod @abstractmethod @@ -352,6 +358,7 @@ class XeniumPreprocessor(ISTPreprocessor): tx_fields = XeniumTranscriptFields() bd_fields = XeniumBoundaryFields() sw_version = lambda version: version[0] > 1 + DEFAULT_MIN_QV: float = 20.0 @staticmethod def _get_analysis_sw_version(data_dir: Path) -> str: @@ -419,7 +426,7 @@ def transcripts(self) -> pl.DataFrame: pl.col(raw.cell_id).cast(pl.Utf8), ) # Filter data - .filter(pl.col(raw.quality) >= 20) + .filter(pl.col(raw.quality) >= self.min_qv) .filter(pl.col(raw.feature).str.contains( '|'.join(raw.filter_substrings)).not_() ) @@ -564,7 +571,8 @@ def _infer_platform(data_dir: Path) -> str: def get_preprocessor( data_dir: Path, - platform: str | None = None + platform: str | None = None, + min_qv: float | None = None, ) -> ISTPreprocessor: data_dir = Path(data_dir) if platform is None: @@ -575,4 +583,4 @@ def get_preprocessor( f"Available: {list(PREPROCESSORS)}" ) cls = PREPROCESSORS[platform.lower()] - return cls(data_dir) + return cls(data_dir, min_qv=min_qv)