From 90c0e7d52d6d9ea005dde87853918a91654010f6 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Fri, 8 May 2026 12:07:22 -0400 Subject: [PATCH 1/3] Add best_direction() helper for canonical (kind, field) directions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #211. Surfaces the canonical "best direction" of a prediction field (max or min) as a queryable API alongside Kind / kind_support. Half of this was already implicit in BindingPredictionCollection's _best_by_score (max) and _best_by_rank (min); the new module-level ``best_direction(kind, field)`` makes the convention explicit and extends it to ``value``, whose direction is kind-dependent (IC50 nM for pMHC_affinity → min; half-life for pMHC_stability → max). - ``FIELD_BEST_DIRECTIONS``: ``{"score": "max", "percentile_rank": "min"}`` — uniform across kinds. - ``VALUE_BEST_DIRECTIONS``: ``{Kind.pMHC_affinity: "min", Kind.pMHC_stability: "max"}`` — extend per kind. - ``best_direction(kind, field)``: lookup helper that raises ValueError for ``field='value'`` on an unregistered kind, with a pointer to the table. Downstream consumers (topiary's BestAlleleField; vaxrank tier signals) can drop their local direction tables and import from mhctools instead. 7 new tests; full suite green. Bump to 3.14.0 (additive minor). --- mhctools/__init__.py | 8 +++++- mhctools/pred.py | 66 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_pred.py | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/mhctools/__init__.py b/mhctools/__init__.py index 29ef8ff..61897f5 100644 --- a/mhctools/__init__.py +++ b/mhctools/__init__.py @@ -1,6 +1,7 @@ from .binding_prediction import BindingPrediction from .binding_prediction_collection import BindingPredictionCollection from .pred import ( + FIELD_BEST_DIRECTIONS, Kind, MHC_CLASS_VALUES, MHC_DEPENDENCE_VALUES, @@ -8,6 +9,8 @@ PeptideResult, Pred, Prediction, + VALUE_BEST_DIRECTIONS, + best_direction, preds_from_rows, ) from .sample import MultiSample @@ -72,7 +75,7 @@ def __getattr__(name): raise AttributeError( "module %r has no attribute %r" % (__name__, name)) -__version__ = "3.13.7" +__version__ = "3.14.0" __all__ = [ "Prediction", @@ -82,6 +85,9 @@ def __getattr__(name): "Kind", "MHC_CLASS_VALUES", "MHC_DEPENDENCE_VALUES", + "FIELD_BEST_DIRECTIONS", + "VALUE_BEST_DIRECTIONS", + "best_direction", "preds_from_rows", "MultiSample", "BindingPrediction", diff --git a/mhctools/pred.py b/mhctools/pred.py index 36abfdc..1a872b7 100644 --- a/mhctools/pred.py +++ b/mhctools/pred.py @@ -53,6 +53,72 @@ class Kind: erap_trimming = "erap_trimming" +# Canonical "best direction" for each prediction field. Used by +# downstream aggregators (e.g. "best across alleles" or "best across +# methods") and by the :class:`PeptideResult` ``.best_*`` accessors. +# +# - ``score``: every kind that uses score normalizes higher = better +# (binding strength, presentation likelihood, immunogenicity, ...). +# - ``percentile_rank``: 0 means best, smaller is better, every kind. +# - ``value``: kind-dependent — see :data:`VALUE_BEST_DIRECTIONS`. +FIELD_BEST_DIRECTIONS = { + "score": "max", + "percentile_rank": "min", +} + +# Per-kind direction for ``value``. ``value`` carries the predictor's +# raw output unit (IC50 nM for affinity, half-life for stability, ...); +# whether higher or lower is "better" depends on what the unit means. +# Add an entry when introducing a new ``value``-bearing kind. +VALUE_BEST_DIRECTIONS = { + Kind.pMHC_affinity: "min", # IC50 nM + Kind.pMHC_stability: "max", # half-life +} + + +def best_direction(kind, field) -> str: + """Canonical "best" direction for a ``(kind, field)`` pair. + + Returns ``"max"`` or ``"min"``. + + Parameters + ---------- + kind : str or Kind + Prediction kind (e.g. ``Kind.pMHC_affinity``). + field : str + Column name within the kind's predictions — + ``"score"``, ``"percentile_rank"``, or ``"value"``. + + Raises + ------ + ValueError + If ``field`` is unknown, or if ``field == "value"`` for a kind + without a registered direction in :data:`VALUE_BEST_DIRECTIONS`. + + Notes + ----- + ``score`` and ``percentile_rank`` directions are uniform across all + kinds. ``value`` is kind-dependent: e.g. ``pMHC_affinity`` reports + IC50 in nM (lower better) while ``pMHC_stability`` reports half-life + (higher better). + """ + direction = FIELD_BEST_DIRECTIONS.get(field) + if direction is not None: + return direction + if field == "value": + if kind not in VALUE_BEST_DIRECTIONS: + raise ValueError( + f"best_direction undefined for ({kind!r}, 'value') — " + f"`value` semantics depend on the kind. Add an entry " + f"to mhctools.pred.VALUE_BEST_DIRECTIONS." + ) + return VALUE_BEST_DIRECTIONS[kind] + known = sorted(FIELD_BEST_DIRECTIONS) + ["value"] + raise ValueError( + f"best_direction undefined for field {field!r}. Known: {known}." + ) + + COLUMNS = ( "sample_name", "peptide", diff --git a/tests/test_pred.py b/tests/test_pred.py index 9d486a0..fe8b132 100644 --- a/tests/test_pred.py +++ b/tests/test_pred.py @@ -10,13 +10,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + from mhctools.pred import ( COLUMNS, + FIELD_BEST_DIRECTIONS, Kind, MHC_CLASS_VALUES, MHC_DEPENDENCE_VALUES, PeptideResult, Prediction, + VALUE_BEST_DIRECTIONS, + best_direction, preds_from_rows, ) from mhctools.sample import MultiSample @@ -572,3 +577,53 @@ def test_collection_to_peptide_preds(): assert len(pp_list) == 2 # two distinct peptide positions sizes = sorted(len(pp.preds) for pp in pp_list) assert sizes == [1, 2] + + +# -- best_direction -- + + +def test_field_best_directions_constants(): + """score is max-better, percentile_rank is min-better — uniform across kinds.""" + assert FIELD_BEST_DIRECTIONS["score"] == "max" + assert FIELD_BEST_DIRECTIONS["percentile_rank"] == "min" + + +def test_value_best_directions_kind_specific(): + """value direction is kind-dependent; affinity uses IC50 (min-better).""" + assert VALUE_BEST_DIRECTIONS[Kind.pMHC_affinity] == "min" + assert VALUE_BEST_DIRECTIONS[Kind.pMHC_stability] == "max" + + +def test_best_direction_field_defaults(): + # score is max for any kind that uses it + assert best_direction(Kind.pMHC_affinity, "score") == "max" + assert best_direction(Kind.pMHC_presentation, "score") == "max" + assert best_direction(Kind.proteasome_cleavage, "score") == "max" + # percentile_rank is min for any kind that uses it + assert best_direction(Kind.pMHC_affinity, "percentile_rank") == "min" + assert best_direction(Kind.pMHC_presentation, "percentile_rank") == "min" + + +def test_best_direction_value_kind_specific(): + assert best_direction(Kind.pMHC_affinity, "value") == "min" # IC50 + assert best_direction(Kind.pMHC_stability, "value") == "max" # half-life + + +def test_best_direction_value_unregistered_raises(): + """`value` for a kind without a registered direction must raise — + we refuse to guess the unit semantics.""" + with pytest.raises(ValueError, match="best_direction undefined"): + best_direction(Kind.pMHC_presentation, "value") + with pytest.raises(ValueError, match="best_direction undefined"): + best_direction(Kind.proteasome_cleavage, "value") + + +def test_best_direction_unknown_field_raises(): + with pytest.raises(ValueError, match="best_direction undefined for field"): + best_direction(Kind.pMHC_affinity, "made_up_field") + + +def test_best_direction_accepts_string_kind(): + # Kind constants are plain strings, so callers can pass either form. + assert best_direction("pMHC_affinity", "value") == "min" + assert best_direction("pMHC_affinity", "score") == "max" From df14d804b5ea41c929cadda22e1b21085ca43e61 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Fri, 8 May 2026 17:36:37 -0400 Subject: [PATCH 2/3] Promote best_by methods to public API on PeptideResult Adds a single direction-aware best_by(kind, field) that uses best_direction() for max/min, plus thin best_by_score / best_by_rank / best_by_value wrappers. The existing kind accessors (affinity, presentation, best_affinity_by_rank, ...) now route through the public methods. Behavior changes: - best_by_value(kind) raises ValueError for kinds without a registered value direction (consistent with best_direction()), since `value` semantics are kind-dependent. - The no-allele fallback that used to live in _best_by_score is now uniform across fields: prefer predictions with an allele, fall back to allele-less if none have an allele, skip None values. 8 new tests; full suite green. Bump to 3.14.1. --- mhctools/__init__.py | 2 +- mhctools/pred.py | 73 +++++++++++++++++++++++++++++++------------- tests/test_pred.py | 70 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/mhctools/__init__.py b/mhctools/__init__.py index 61897f5..5956735 100644 --- a/mhctools/__init__.py +++ b/mhctools/__init__.py @@ -75,7 +75,7 @@ def __getattr__(name): raise AttributeError( "module %r has no attribute %r" % (__name__, name)) -__version__ = "3.14.0" +__version__ = "3.14.1" __all__ = [ "Prediction", diff --git a/mhctools/pred.py b/mhctools/pred.py index 1a872b7..8c82f75 100644 --- a/mhctools/pred.py +++ b/mhctools/pred.py @@ -243,27 +243,27 @@ def alleles(self) -> set: @property def affinity(self) -> Optional[Prediction]: """Best affinity prediction, or None.""" - return self._best_by_score(Kind.pMHC_affinity) + return self.best_by_score(Kind.pMHC_affinity) @property def presentation(self) -> Optional[Prediction]: """Best presentation prediction, or None.""" - return self._best_by_score(Kind.pMHC_presentation) + return self.best_by_score(Kind.pMHC_presentation) @property def stability(self) -> Optional[Prediction]: """Best stability prediction, or None.""" - return self._best_by_score(Kind.pMHC_stability) + return self.best_by_score(Kind.pMHC_stability) @property def immunogenicity(self) -> Optional[Prediction]: """Best immunogenicity prediction, or None.""" - return self._best_by_score(Kind.immunogenicity) + return self.best_by_score(Kind.immunogenicity) @property def cleavage(self) -> Optional[Prediction]: """Best proteasomal cleavage prediction, or None.""" - return self._best_by_score(Kind.proteasome_cleavage) + return self.best_by_score(Kind.proteasome_cleavage) # backward compat aliases @property @@ -282,15 +282,15 @@ def best_stability(self) -> Optional[Prediction]: @property def best_affinity_by_rank(self) -> Optional[Prediction]: - return self._best_by_rank(Kind.pMHC_affinity) + return self.best_by_rank(Kind.pMHC_affinity) @property def best_presentation_by_rank(self) -> Optional[Prediction]: - return self._best_by_rank(Kind.pMHC_presentation) + return self.best_by_rank(Kind.pMHC_presentation) @property def best_stability_by_rank(self) -> Optional[Prediction]: - return self._best_by_rank(Kind.pMHC_stability) + return self.best_by_rank(Kind.pMHC_stability) # --- filtering --- @@ -319,20 +319,49 @@ def to_dataframe(self, sample_name=""): return pd.DataFrame(columns=COLUMNS) return pd.DataFrame(rows, columns=COLUMNS) - # --- internals --- - - def _best_by_score(self, kind) -> Optional[Prediction]: - candidates = [p for p in self.preds if p.kind == kind and p.allele] - if not candidates: - # Fall back to preds without allele (e.g. processing predictors) - candidates = [p for p in self.preds if p.kind == kind] - return max(candidates, key=lambda p: p.score) if candidates else None - - def _best_by_rank(self, kind) -> Optional[Prediction]: - candidates = [p for p in self.preds - if p.kind == kind and p.allele - and p.percentile_rank is not None] - return min(candidates, key=lambda p: p.percentile_rank) if candidates else None + # --- best_by (public, direction-aware) --- + + def best_by(self, kind, field) -> Optional[Prediction]: + """Return the prediction of ``kind`` with the best ``field``, or None. + + "Best" direction comes from :func:`best_direction` — + ``score`` is max-better, ``percentile_rank`` is min-better, and + ``value`` is kind-dependent (IC50 lower-better, half-life higher-better). + + Predictions with ``None`` for ``field`` are skipped. Predictions + with an allele are preferred; if none of the matching kind have an + allele, falls back to allele-less predictions (e.g. processing + predictors that emit allele-independent scores). + """ + direction = best_direction(kind, field) + op = max if direction == "max" else min + + def has_value(p): + return getattr(p, field) is not None + + with_allele = [p for p in self.preds + if p.kind == kind and p.allele and has_value(p)] + if with_allele: + return op(with_allele, key=lambda p: getattr(p, field)) + without_allele = [p for p in self.preds + if p.kind == kind and has_value(p)] + if without_allele: + return op(without_allele, key=lambda p: getattr(p, field)) + return None + + def best_by_score(self, kind) -> Optional[Prediction]: + """Best prediction of ``kind`` by ``score`` (max-better).""" + return self.best_by(kind, "score") + + def best_by_rank(self, kind) -> Optional[Prediction]: + """Best prediction of ``kind`` by ``percentile_rank`` (min-better).""" + return self.best_by(kind, "percentile_rank") + + def best_by_value(self, kind) -> Optional[Prediction]: + """Best prediction of ``kind`` by ``value``. Direction is kind-specific + (see :data:`VALUE_BEST_DIRECTIONS`). Raises ``ValueError`` for kinds + without a registered ``value`` direction.""" + return self.best_by(kind, "value") def preds_from_rows(rows, **shared): diff --git a/tests/test_pred.py b/tests/test_pred.py index fe8b132..cdcda76 100644 --- a/tests/test_pred.py +++ b/tests/test_pred.py @@ -627,3 +627,73 @@ def test_best_direction_accepts_string_kind(): # Kind constants are plain strings, so callers can pass either form. assert best_direction("pMHC_affinity", "value") == "min" assert best_direction("pMHC_affinity", "score") == "max" + + +# -- best_by (public, direction-aware) -- + + +def test_best_by_score_picks_max(): + ps = _make_pred_set() + best = ps.best_by_score(Kind.pMHC_affinity) + assert best.allele == "HLA-A*02:01" + assert best.score == 0.85 + + +def test_best_by_rank_picks_min(): + ps = _make_pred_set() + best = ps.best_by_rank(Kind.pMHC_affinity) + assert best.allele == "HLA-A*02:01" + assert best.percentile_rank == 0.8 + + +def test_best_by_value_affinity_picks_min_ic50(): + ps = _make_pred_set() + best = ps.best_by_value(Kind.pMHC_affinity) + assert best.allele == "HLA-A*02:01" + assert best.value == 120.5 + + +def test_best_by_value_stability_picks_max_half_life(): + ps = preds_from_rows( + [ + dict(kind=Kind.pMHC_stability, allele="HLA-A*02:01", + score=0.6, value=4.0), + dict(kind=Kind.pMHC_stability, allele="HLA-B*07:02", + score=0.9, value=12.0), + ], + peptide="SIINFEKL", + ) + best = ps.best_by_value(Kind.pMHC_stability) + assert best.allele == "HLA-B*07:02" + assert best.value == 12.0 + + +def test_best_by_value_unregistered_kind_raises(): + ps = _make_pred_set() + with pytest.raises(ValueError, match="best_direction undefined"): + ps.best_by_value(Kind.pMHC_presentation) + + +def test_best_by_skips_none_field(): + # antigen_processing pred has score but None for percentile_rank + ps = _make_pred_set() + assert ps.best_by_rank(Kind.antigen_processing) is None + assert ps.best_by_score(Kind.antigen_processing) is not None + + +def test_best_by_falls_back_to_allele_less(): + ps = preds_from_rows( + [dict(kind=Kind.proteasome_cleavage, score=0.7), + dict(kind=Kind.proteasome_cleavage, score=0.9)], + peptide="SIINFEKL", + ) + best = ps.best_by_score(Kind.proteasome_cleavage) + assert best is not None + assert best.score == 0.9 + assert best.allele == "" + + +def test_best_by_missing_kind_returns_none(): + ps = _make_pred_set() + assert ps.best_by_score(Kind.immunogenicity) is None + assert ps.best_by_rank(Kind.pMHC_stability) is None From 9cdb8cb261d396ebde9a27f5b54abbdc7799a7ec Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Fri, 8 May 2026 19:03:14 -0400 Subject: [PATCH 3/3] Lock new best_by contract: string-kind input + rank no-allele fallback Two completeness tests: - best_by accepts string-form kinds (mirrors the existing best_direction string-kind test). - best_*_by_rank now returns allele-less rank predictions when no allele-bearing rank pred exists. This is a behavior change vs the old _best_by_rank (which had no fallback) introduced when best_by was unified across fields. Pinning so it can't silently regress. --- tests/test_pred.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_pred.py b/tests/test_pred.py index cdcda76..dc809e1 100644 --- a/tests/test_pred.py +++ b/tests/test_pred.py @@ -697,3 +697,29 @@ def test_best_by_missing_kind_returns_none(): ps = _make_pred_set() assert ps.best_by_score(Kind.immunogenicity) is None assert ps.best_by_rank(Kind.pMHC_stability) is None + + +def test_best_by_accepts_string_kind(): + # Kind constants are plain strings, so callers can pass either form. + ps = _make_pred_set() + by_kind = ps.best_by_score(Kind.pMHC_affinity) + by_str = ps.best_by_score("pMHC_affinity") + assert by_kind == by_str + assert ps.best_by("pMHC_affinity", "value") == ps.best_by_value(Kind.pMHC_affinity) + + +def test_best_by_rank_falls_back_to_allele_less(): + # Behavior change vs the old _best_by_rank: rank predictions without an + # allele are now considered when no allele-bearing rank pred exists. Pin + # the new contract so it can't silently regress. + ps = preds_from_rows( + [ + dict(kind=Kind.pMHC_presentation, score=0.5, percentile_rank=10.0), + dict(kind=Kind.pMHC_presentation, score=0.8, percentile_rank=2.0), + ], + peptide="SIINFEKL", + ) + best = ps.best_presentation_by_rank + assert best is not None + assert best.allele == "" + assert best.percentile_rank == 2.0