Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion mhctools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from .binding_prediction import BindingPrediction
from .binding_prediction_collection import BindingPredictionCollection
from .pred import (
FIELD_BEST_DIRECTIONS,
Kind,
MHC_CLASS_VALUES,
MHC_DEPENDENCE_VALUES,
PeptidePreds,
PeptideResult,
Pred,
Prediction,
VALUE_BEST_DIRECTIONS,
best_direction,
preds_from_rows,
)
from .sample import MultiSample
Expand Down Expand Up @@ -72,7 +75,7 @@ def __getattr__(name):
raise AttributeError(
"module %r has no attribute %r" % (__name__, name))

__version__ = "3.13.7"
__version__ = "3.14.1"

__all__ = [
"Prediction",
Expand All @@ -82,6 +85,9 @@ def __getattr__(name):
"Kind",
"MHC_CLASS_VALUES",
"MHC_DEPENDENCE_VALUES",
"FIELD_BEST_DIRECTIONS",
"VALUE_BEST_DIRECTIONS",
"best_direction",
"preds_from_rows",
"MultiSample",
"BindingPrediction",
Expand Down
139 changes: 117 additions & 22 deletions mhctools/pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,72 @@ class Kind:
erap_trimming = "erap_trimming"


# Canonical "best direction" for each prediction field. Used by
# downstream aggregators (e.g. "best across alleles" or "best across
# methods") and by the :class:`PeptideResult` ``.best_*`` accessors.
#
# - ``score``: every kind that uses score normalizes higher = better
# (binding strength, presentation likelihood, immunogenicity, ...).
# - ``percentile_rank``: 0 means best, smaller is better, every kind.
# - ``value``: kind-dependent — see :data:`VALUE_BEST_DIRECTIONS`.
FIELD_BEST_DIRECTIONS = {
"score": "max",
"percentile_rank": "min",
}

# Per-kind direction for ``value``. ``value`` carries the predictor's
# raw output unit (IC50 nM for affinity, half-life for stability, ...);
# whether higher or lower is "better" depends on what the unit means.
# Add an entry when introducing a new ``value``-bearing kind.
VALUE_BEST_DIRECTIONS = {
Kind.pMHC_affinity: "min", # IC50 nM
Kind.pMHC_stability: "max", # half-life
}


def best_direction(kind, field) -> str:
"""Canonical "best" direction for a ``(kind, field)`` pair.

Returns ``"max"`` or ``"min"``.

Parameters
----------
kind : str or Kind
Prediction kind (e.g. ``Kind.pMHC_affinity``).
field : str
Column name within the kind's predictions —
``"score"``, ``"percentile_rank"``, or ``"value"``.

Raises
------
ValueError
If ``field`` is unknown, or if ``field == "value"`` for a kind
without a registered direction in :data:`VALUE_BEST_DIRECTIONS`.

Notes
-----
``score`` and ``percentile_rank`` directions are uniform across all
kinds. ``value`` is kind-dependent: e.g. ``pMHC_affinity`` reports
IC50 in nM (lower better) while ``pMHC_stability`` reports half-life
(higher better).
"""
direction = FIELD_BEST_DIRECTIONS.get(field)
if direction is not None:
return direction
if field == "value":
if kind not in VALUE_BEST_DIRECTIONS:
raise ValueError(
f"best_direction undefined for ({kind!r}, 'value') — "
f"`value` semantics depend on the kind. Add an entry "
f"to mhctools.pred.VALUE_BEST_DIRECTIONS."
)
return VALUE_BEST_DIRECTIONS[kind]
known = sorted(FIELD_BEST_DIRECTIONS) + ["value"]
raise ValueError(
f"best_direction undefined for field {field!r}. Known: {known}."
)


COLUMNS = (
"sample_name",
"peptide",
Expand Down Expand Up @@ -177,27 +243,27 @@ def alleles(self) -> set:
@property
def affinity(self) -> Optional[Prediction]:
"""Best affinity prediction, or None."""
return self._best_by_score(Kind.pMHC_affinity)
return self.best_by_score(Kind.pMHC_affinity)

@property
def presentation(self) -> Optional[Prediction]:
"""Best presentation prediction, or None."""
return self._best_by_score(Kind.pMHC_presentation)
return self.best_by_score(Kind.pMHC_presentation)

@property
def stability(self) -> Optional[Prediction]:
"""Best stability prediction, or None."""
return self._best_by_score(Kind.pMHC_stability)
return self.best_by_score(Kind.pMHC_stability)

@property
def immunogenicity(self) -> Optional[Prediction]:
"""Best immunogenicity prediction, or None."""
return self._best_by_score(Kind.immunogenicity)
return self.best_by_score(Kind.immunogenicity)

@property
def cleavage(self) -> Optional[Prediction]:
"""Best proteasomal cleavage prediction, or None."""
return self._best_by_score(Kind.proteasome_cleavage)
return self.best_by_score(Kind.proteasome_cleavage)

# backward compat aliases
@property
Expand All @@ -216,15 +282,15 @@ def best_stability(self) -> Optional[Prediction]:

@property
def best_affinity_by_rank(self) -> Optional[Prediction]:
return self._best_by_rank(Kind.pMHC_affinity)
return self.best_by_rank(Kind.pMHC_affinity)

@property
def best_presentation_by_rank(self) -> Optional[Prediction]:
return self._best_by_rank(Kind.pMHC_presentation)
return self.best_by_rank(Kind.pMHC_presentation)

@property
def best_stability_by_rank(self) -> Optional[Prediction]:
return self._best_by_rank(Kind.pMHC_stability)
return self.best_by_rank(Kind.pMHC_stability)

# --- filtering ---

Expand Down Expand Up @@ -253,20 +319,49 @@ def to_dataframe(self, sample_name=""):
return pd.DataFrame(columns=COLUMNS)
return pd.DataFrame(rows, columns=COLUMNS)

# --- internals ---

def _best_by_score(self, kind) -> Optional[Prediction]:
candidates = [p for p in self.preds if p.kind == kind and p.allele]
if not candidates:
# Fall back to preds without allele (e.g. processing predictors)
candidates = [p for p in self.preds if p.kind == kind]
return max(candidates, key=lambda p: p.score) if candidates else None

def _best_by_rank(self, kind) -> Optional[Prediction]:
candidates = [p for p in self.preds
if p.kind == kind and p.allele
and p.percentile_rank is not None]
return min(candidates, key=lambda p: p.percentile_rank) if candidates else None
# --- best_by (public, direction-aware) ---

def best_by(self, kind, field) -> Optional[Prediction]:
"""Return the prediction of ``kind`` with the best ``field``, or None.

"Best" direction comes from :func:`best_direction` —
``score`` is max-better, ``percentile_rank`` is min-better, and
``value`` is kind-dependent (IC50 lower-better, half-life higher-better).

Predictions with ``None`` for ``field`` are skipped. Predictions
with an allele are preferred; if none of the matching kind have an
allele, falls back to allele-less predictions (e.g. processing
predictors that emit allele-independent scores).
"""
direction = best_direction(kind, field)
op = max if direction == "max" else min

def has_value(p):
return getattr(p, field) is not None

with_allele = [p for p in self.preds
if p.kind == kind and p.allele and has_value(p)]
if with_allele:
return op(with_allele, key=lambda p: getattr(p, field))
without_allele = [p for p in self.preds
if p.kind == kind and has_value(p)]
if without_allele:
return op(without_allele, key=lambda p: getattr(p, field))
return None

def best_by_score(self, kind) -> Optional[Prediction]:
"""Best prediction of ``kind`` by ``score`` (max-better)."""
return self.best_by(kind, "score")

def best_by_rank(self, kind) -> Optional[Prediction]:
"""Best prediction of ``kind`` by ``percentile_rank`` (min-better)."""
return self.best_by(kind, "percentile_rank")

def best_by_value(self, kind) -> Optional[Prediction]:
"""Best prediction of ``kind`` by ``value``. Direction is kind-specific
(see :data:`VALUE_BEST_DIRECTIONS`). Raises ``ValueError`` for kinds
without a registered ``value`` direction."""
return self.best_by(kind, "value")


def preds_from_rows(rows, **shared):
Expand Down
151 changes: 151 additions & 0 deletions tests/test_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from mhctools.pred import (
COLUMNS,
FIELD_BEST_DIRECTIONS,
Kind,
MHC_CLASS_VALUES,
MHC_DEPENDENCE_VALUES,
PeptideResult,
Prediction,
VALUE_BEST_DIRECTIONS,
best_direction,
preds_from_rows,
)
from mhctools.sample import MultiSample
Expand Down Expand Up @@ -572,3 +577,149 @@ def test_collection_to_peptide_preds():
assert len(pp_list) == 2 # two distinct peptide positions
sizes = sorted(len(pp.preds) for pp in pp_list)
assert sizes == [1, 2]


# -- best_direction --


def test_field_best_directions_constants():
"""score is max-better, percentile_rank is min-better — uniform across kinds."""
assert FIELD_BEST_DIRECTIONS["score"] == "max"
assert FIELD_BEST_DIRECTIONS["percentile_rank"] == "min"


def test_value_best_directions_kind_specific():
"""value direction is kind-dependent; affinity uses IC50 (min-better)."""
assert VALUE_BEST_DIRECTIONS[Kind.pMHC_affinity] == "min"
assert VALUE_BEST_DIRECTIONS[Kind.pMHC_stability] == "max"


def test_best_direction_field_defaults():
# score is max for any kind that uses it
assert best_direction(Kind.pMHC_affinity, "score") == "max"
assert best_direction(Kind.pMHC_presentation, "score") == "max"
assert best_direction(Kind.proteasome_cleavage, "score") == "max"
# percentile_rank is min for any kind that uses it
assert best_direction(Kind.pMHC_affinity, "percentile_rank") == "min"
assert best_direction(Kind.pMHC_presentation, "percentile_rank") == "min"


def test_best_direction_value_kind_specific():
assert best_direction(Kind.pMHC_affinity, "value") == "min" # IC50
assert best_direction(Kind.pMHC_stability, "value") == "max" # half-life


def test_best_direction_value_unregistered_raises():
"""`value` for a kind without a registered direction must raise —
we refuse to guess the unit semantics."""
with pytest.raises(ValueError, match="best_direction undefined"):
best_direction(Kind.pMHC_presentation, "value")
with pytest.raises(ValueError, match="best_direction undefined"):
best_direction(Kind.proteasome_cleavage, "value")


def test_best_direction_unknown_field_raises():
with pytest.raises(ValueError, match="best_direction undefined for field"):
best_direction(Kind.pMHC_affinity, "made_up_field")


def test_best_direction_accepts_string_kind():
# Kind constants are plain strings, so callers can pass either form.
assert best_direction("pMHC_affinity", "value") == "min"
assert best_direction("pMHC_affinity", "score") == "max"


# -- best_by (public, direction-aware) --


def test_best_by_score_picks_max():
ps = _make_pred_set()
best = ps.best_by_score(Kind.pMHC_affinity)
assert best.allele == "HLA-A*02:01"
assert best.score == 0.85


def test_best_by_rank_picks_min():
ps = _make_pred_set()
best = ps.best_by_rank(Kind.pMHC_affinity)
assert best.allele == "HLA-A*02:01"
assert best.percentile_rank == 0.8


def test_best_by_value_affinity_picks_min_ic50():
ps = _make_pred_set()
best = ps.best_by_value(Kind.pMHC_affinity)
assert best.allele == "HLA-A*02:01"
assert best.value == 120.5


def test_best_by_value_stability_picks_max_half_life():
ps = preds_from_rows(
[
dict(kind=Kind.pMHC_stability, allele="HLA-A*02:01",
score=0.6, value=4.0),
dict(kind=Kind.pMHC_stability, allele="HLA-B*07:02",
score=0.9, value=12.0),
],
peptide="SIINFEKL",
)
best = ps.best_by_value(Kind.pMHC_stability)
assert best.allele == "HLA-B*07:02"
assert best.value == 12.0


def test_best_by_value_unregistered_kind_raises():
ps = _make_pred_set()
with pytest.raises(ValueError, match="best_direction undefined"):
ps.best_by_value(Kind.pMHC_presentation)


def test_best_by_skips_none_field():
# antigen_processing pred has score but None for percentile_rank
ps = _make_pred_set()
assert ps.best_by_rank(Kind.antigen_processing) is None
assert ps.best_by_score(Kind.antigen_processing) is not None


def test_best_by_falls_back_to_allele_less():
ps = preds_from_rows(
[dict(kind=Kind.proteasome_cleavage, score=0.7),
dict(kind=Kind.proteasome_cleavage, score=0.9)],
peptide="SIINFEKL",
)
best = ps.best_by_score(Kind.proteasome_cleavage)
assert best is not None
assert best.score == 0.9
assert best.allele == ""


def test_best_by_missing_kind_returns_none():
ps = _make_pred_set()
assert ps.best_by_score(Kind.immunogenicity) is None
assert ps.best_by_rank(Kind.pMHC_stability) is None


def test_best_by_accepts_string_kind():
# Kind constants are plain strings, so callers can pass either form.
ps = _make_pred_set()
by_kind = ps.best_by_score(Kind.pMHC_affinity)
by_str = ps.best_by_score("pMHC_affinity")
assert by_kind == by_str
assert ps.best_by("pMHC_affinity", "value") == ps.best_by_value(Kind.pMHC_affinity)


def test_best_by_rank_falls_back_to_allele_less():
# Behavior change vs the old _best_by_rank: rank predictions without an
# allele are now considered when no allele-bearing rank pred exists. Pin
# the new contract so it can't silently regress.
ps = preds_from_rows(
[
dict(kind=Kind.pMHC_presentation, score=0.5, percentile_rank=10.0),
dict(kind=Kind.pMHC_presentation, score=0.8, percentile_rank=2.0),
],
peptide="SIINFEKL",
)
best = ps.best_presentation_by_rank
assert best is not None
assert best.allele == ""
assert best.percentile_rank == 2.0
Loading