Skip to content

Commit 7ab01a9

Browse files
authored
Add predictor MHC context metadata (#210)
1 parent efb63a2 commit 7ab01a9

17 files changed

Lines changed: 568 additions & 92 deletions

README.md

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,12 @@ df = ms.predict_dataframe(["SIINFEKL"])
152152
df = ms.predict_proteins_dataframe({"TP53": "MEEPQ..."})
153153
```
154154

155-
### Measurement kinds
155+
### Measurement kinds and MHC context
156156

157157
Each `Prediction` has a `kind` string describing what it measures:
158158

159+
The canonical prediction kind strings are defined in `mhctools.pred.Kind`.
160+
159161
| Kind | Meaning |
160162
|---|---|
161163
| `pMHC_affinity` | Peptide-MHC binding affinity |
@@ -167,6 +169,49 @@ Each `Prediction` has a `kind` string describing what it measures:
167169
| `tap_transport` | TAP transport score (reserved, not yet used) |
168170
| `erap_trimming` | ERAP trimming score (reserved, not yet used) |
169171

172+
Predictors also expose `kind_support()` so downstream code can tell what MHC
173+
context is meaningful for each emitted kind:
174+
175+
```python
176+
support = predictor.kind_support()
177+
support["pMHC_affinity"]
178+
# {"mhc_dependence": "single_allele", "mhc_class": "I"}
179+
```
180+
181+
`mhc_dependence` is one of:
182+
183+
| Value | Meaning |
184+
|---|---|
185+
| `none` | The prediction is MHC-independent; `Prediction.allele` is empty. |
186+
| `single_allele` | The prediction is for one peptide/MHC allele pair; `Prediction.allele` is part of the key. |
187+
| `haplotype` | The prediction uses the requested MHC repertoire jointly; `Prediction.allele` may carry best-allele attribution but is not the prediction key. |
188+
189+
`mhc_class` is one of `none`, `I`, `II`, or `both`.
190+
191+
The allowed metadata values are defined in `mhctools.pred` as
192+
`MHC_DEPENDENCE_VALUES` and `MHC_CLASS_VALUES`.
193+
194+
Examples:
195+
196+
| Predictor | Kind | `mhc_dependence` | `mhc_class` |
197+
|---|---|---|---|
198+
| `NetMHCpan41` | `pMHC_affinity` | `single_allele` | `I` |
199+
| `NetMHCpan41` | `pMHC_presentation` | `single_allele` | `I` |
200+
| `NetMHCIIpan4_EL` | `pMHC_presentation` | `single_allele` | `II` |
201+
| `NetMHCstabpan` | `pMHC_stability` | `single_allele` | `I` |
202+
| `MHCflurry` | `pMHC_affinity` | `single_allele` | `I` |
203+
| `MHCflurry` haplotype mode | `pMHC_presentation` | `haplotype` | `I` |
204+
| `MHCflurry` per-allele panel mode | `pMHC_presentation` | `single_allele` | `I` |
205+
| `Pepsickle` | `proteasome_cleavage` | `none` | `none` |
206+
207+
For MHCflurry presentation, `presentation_allele_mode="haplotype"` treats the
208+
requested alleles as one sample genotype and emits one `pMHC_presentation`
209+
record per peptide. The `allele` field carries MHCflurry's `best_allele`
210+
attribution when available. `presentation_allele_mode="per_allele"` treats each
211+
allele as a separate one-allele synthetic sample and emits one presentation
212+
record per peptide/allele pair. The default `"auto"` mode uses haplotype mode
213+
for up to six alleles and per-allele mode for larger allele panels.
214+
170215
### The Prediction object
171216

172217
Every prediction is a frozen, self-contained `Prediction` dataclass:

mhctools/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
from .binding_prediction import BindingPrediction
22
from .binding_prediction_collection import BindingPredictionCollection
3-
from .pred import Prediction, Pred, PeptideResult, PeptidePreds, Kind, preds_from_rows
3+
from .pred import (
4+
Kind,
5+
MHC_CLASS_VALUES,
6+
MHC_DEPENDENCE_VALUES,
7+
PeptidePreds,
8+
PeptideResult,
9+
Pred,
10+
Prediction,
11+
preds_from_rows,
12+
)
413
from .sample import MultiSample
514
from .iedb import (
615
IedbNetMHCcons,
@@ -63,14 +72,16 @@ def __getattr__(name):
6372
raise AttributeError(
6473
"module %r has no attribute %r" % (__name__, name))
6574

66-
__version__ = "3.13.6"
75+
__version__ = "3.13.7"
6776

6877
__all__ = [
6978
"Prediction",
7079
"Pred", # backward compat alias
7180
"PeptideResult",
7281
"PeptidePreds", # backward compat alias
7382
"Kind",
83+
"MHC_CLASS_VALUES",
84+
"MHC_DEPENDENCE_VALUES",
7485
"preds_from_rows",
7586
"MultiSample",
7687
"BindingPrediction",

mhctools/base_predictor.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919

2020
from .unsupported_allele import UnsupportedAllele
2121
from .binding_prediction_collection import BindingPredictionCollection
22-
from .pred import Prediction, Kind, PeptideResult
22+
from .pred import (
23+
Kind,
24+
PeptideResult,
25+
Prediction,
26+
)
2327

2428
logger = logging.getLogger(__name__)
2529

@@ -101,6 +105,7 @@ class BasePredictor(object):
101105
flank_length = 15
102106
n_flank_length = None
103107
c_flank_length = None
108+
mhc_class = "I"
104109

105110
def __init__(
106111
self,
@@ -331,6 +336,20 @@ def _default_pred_kind(self):
331336
"""Override in subclasses to set the Kind for compat conversion."""
332337
return Kind.pMHC_affinity
333338

339+
def kind_support(self):
340+
"""Predictor-specific MHC context for supported prediction kinds."""
341+
return {
342+
self._default_pred_kind(): {
343+
"mhc_dependence": "single_allele",
344+
"mhc_class": self.mhc_class,
345+
}
346+
}
347+
348+
@property
349+
def supported_kinds(self):
350+
"""Prediction kind strings this predictor can emit."""
351+
return tuple(self.kind_support())
352+
334353
# --- deprecated API (still works) ---
335354

336355
def predict_peptides(self, peptides):

mhctools/bigmhc.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@
3131
import pandas as pd
3232
import torch
3333

34-
from .pred import Kind, Prediction, PeptideResult, COLUMNS
34+
from .pred import (
35+
COLUMNS,
36+
Kind,
37+
PeptideResult,
38+
Prediction,
39+
)
3540

3641

3742
def _find_bigmhc_dir(bigmhc_path=None):
@@ -133,6 +138,18 @@ def _pred_kind(self):
133138
return Kind.immunogenicity
134139
return Kind.pMHC_presentation
135140

141+
def kind_support(self):
142+
return {
143+
self._pred_kind(): {
144+
"mhc_dependence": "single_allele",
145+
"mhc_class": "I",
146+
}
147+
}
148+
149+
@property
150+
def supported_kinds(self):
151+
return tuple(self.kind_support())
152+
136153
def _predictor_name(self):
137154
return "bigmhc_%s" % self.mode
138155

mhctools/iedb.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,8 @@ def __init__(
325325
IEDB_MHC_CLASS_II_URL = "http://tools-cluster-interface.iedb.org/tools_api/mhcii/"
326326

327327
class IedbNetMHCIIpan(IedbBasePredictor):
328+
mhc_class = "II"
329+
328330
def __init__(
329331
self,
330332
alleles,

0 commit comments

Comments
 (0)