From b73fd77e6500a3d42b542c05f1767fb3f931ec0d Mon Sep 17 00:00:00 2001 From: Elihei2 Date: Mon, 1 Jun 2026 13:44:30 +0200 Subject: [PATCH] fix(io): make the Xenium QV threshold configurable (default 20) The Xenium reader hard-coded `.filter(qv >= 20)`. Promote it to a `min_qv` preprocessor parameter (threaded through get_preprocessor) with a per-platform DEFAULT_MIN_QV (20.0 for Xenium, None elsewhere). Default reproduces today's behaviour exactly; callers can now relax/tighten or disable the QV filter. Review: ISTPreprocessor.__init__ + get_preprocessor signature; the one Xenium filter line now reads self.min_qv. --- src/segger/io/preprocessor.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/segger/io/preprocessor.py b/src/segger/io/preprocessor.py index 9bbe62d..24ad48f 100644 --- a/src/segger/io/preprocessor.py +++ b/src/segger/io/preprocessor.py @@ -63,16 +63,22 @@ class ISTPreprocessor(ABC): transcript and boundary GeoDataFrames for the given platform. """ - def __init__(self, data_dir: Path): + DEFAULT_MIN_QV: float | None = None + + def __init__(self, data_dir: Path, min_qv: float | None = None): """ Parameters ---------- data_dir : Path Path to the raw data directory for the spatial platform. + min_qv : float, optional + Minimum transcript quality to keep. Defaults to the platform's + ``DEFAULT_MIN_QV`` (None = no quality filter). """ data_dir = Path(data_dir) type(self)._validate_directory(data_dir) self.data_dir = data_dir + self.min_qv = type(self).DEFAULT_MIN_QV if min_qv is None else min_qv @staticmethod @abstractmethod @@ -352,6 +358,7 @@ class XeniumPreprocessor(ISTPreprocessor): tx_fields = XeniumTranscriptFields() bd_fields = XeniumBoundaryFields() sw_version = lambda version: version[0] > 1 + DEFAULT_MIN_QV: float = 20.0 @staticmethod def _get_analysis_sw_version(data_dir: Path) -> str: @@ -419,7 +426,7 @@ def transcripts(self) -> pl.DataFrame: pl.col(raw.cell_id).cast(pl.Utf8), ) # Filter data - .filter(pl.col(raw.quality) >= 20) + .filter(pl.col(raw.quality) >= self.min_qv) .filter(pl.col(raw.feature).str.contains( '|'.join(raw.filter_substrings)).not_() ) @@ -564,7 +571,8 @@ def _infer_platform(data_dir: Path) -> str: def get_preprocessor( data_dir: Path, - platform: str | None = None + platform: str | None = None, + min_qv: float | None = None, ) -> ISTPreprocessor: data_dir = Path(data_dir) if platform is None: @@ -575,4 +583,4 @@ def get_preprocessor( f"Available: {list(PREPROCESSORS)}" ) cls = PREPROCESSORS[platform.lower()] - return cls(data_dir) + return cls(data_dir, min_qv=min_qv)