OpenProteinAI
diff --git a/‎flake.nix‎
Lines changed: 2 additions & 2 deletions b/‎flake.nix‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openprotein/embeddings/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎openprotein/embeddings/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎openprotein/embeddings/ablang.py‎
Lines changed: 21 additions & 0 deletions b/‎openprotein/embeddings/ablang.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎openprotein/embeddings/api.py‎
Lines changed: 9 additions & 2 deletions b/‎openprotein/embeddings/api.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎openprotein/embeddings/future.py‎
Lines changed: 6 additions & 2 deletions b/‎openprotein/embeddings/future.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎openprotein/embeddings/poet2.py‎
Lines changed: 19 additions & 3 deletions b/‎openprotein/embeddings/poet2.py‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎openprotein/fold/__init__.py‎
Lines changed: 27 additions & 8 deletions b/‎openprotein/fold/__init__.py‎
Lines changed: 27 additions & 8 deletions
diff --git a/‎openprotein/fold/alphafold2.py‎
Lines changed: 11 additions & 21 deletions b/‎openprotein/fold/alphafold2.py‎
Lines changed: 11 additions & 21 deletions
diff --git a/‎openprotein/fold/api.py‎
Lines changed: 10 additions & 102 deletions b/‎openprotein/fold/api.py‎
Lines changed: 10 additions & 102 deletions
@@ -18,8 +18,8 @@
         devShells.default = pkgs.mkShell {
           packages = [ pkgs.bashInteractive ];
           shellHook = ''
-            pixi i -e dev
-            eval $(pixi shell-hook -e dev)
+            uv sync --group dev
+            source .venv/bin/activate
           '';
         };
       }
 
@@ -10,6 +10,7 @@
 from .esm import ESMModel
 from .poet import PoETModel
 from .poet2 import PoET2Model
+from .ablang import AbLang2Model
 from .schemas import (
     EmbeddedSequence,
     EmbeddingsJob,
 
@@ -0,0 +1,21 @@
+"""AbLang model."""
+
+from .models import EmbeddingModel
+
+
+class AbLang2Model(EmbeddingModel):
+    """
+    Community AbLang2 model that targets antibodies.
+
+    Examples
+    --------
+    View specific model details (inc supported tokens) with the `?` operator.
+
+    .. code-block:: python
+
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.ablang2?
+    """
+
+    model_id = ["ablang2"]
@@ -598,9 +598,16 @@ def request_generate_post(
         body["seed"] = random_seed
     if kwargs.get("prompt_id"):
         body["prompt_id"] = kwargs["prompt_id"]
-    if kwargs.get("query_id"):
+    if kwargs.get("design_id"):
+        body["design_id"] = kwargs["design_id"]
+    query_id = kwargs.get("query_id")
+    if query_id is not None:
         assert model_id != "poet", f"Model with id {model_id} does not support query"
-        body["query_id"] = kwargs["query_id"]
+        body["query_id"] = (
+            list(query_id)
+            if isinstance(query_id, list)
+            else query_id
+        )
         if "use_query_structure_in_decoder" in kwargs:
             body["use_query_structure_in_decoder"] = kwargs[
                 "use_query_structure_in_decoder"
 
@@ -165,10 +165,14 @@ class EmbeddingsGenerateFuture(BaseScoreFuture[Score]):
     def stream(self) -> Iterator[Score]:
         stream = api.request_get_generate_result(session=self.session, job_id=self.id)
         # name, sequence, ...
-        next(stream)  # ignore header
+        header = next(stream)
+        has_query_id = (
+            len(header) > 2 and header[-1].strip().lower() == "query_id"
+        )
         for line in stream:
             # combine scores into numpy array
-            scores = np.array([float(s) for s in line[2:]])
+            score_values = line[2:-1] if has_query_id else line[2:]
+            scores = np.array([float(s) for s in score_values])
             output = Score(name=line[0], sequence=line[1], score=scores)
             yield output
 
 
@@ -20,6 +20,7 @@
 from .poet import PoETModel
 
 if TYPE_CHECKING:
+    from openprotein.models.structure_generation import StructureGenerationFuture
     from openprotein.predictor import PredictorModel
     from openprotein.svd import SVDModel
     from openprotein.umap import UMAPModel
@@ -290,7 +291,16 @@ def single_site(
     def generate(
         self,
         prompt: str | Prompt | None,
-        query: str | bytes | Protein | Complex | Query | None = None,
+        query: (
+            str
+            | bytes
+            | Protein
+            | Complex
+            | Query
+            | list[str | bytes | Protein | Complex | Query]
+            | None
+        ) = None,
+        design: "str | StructureGenerationFuture | None" = None,
         use_query_structure_in_decoder: bool = True,
         num_samples: int = 100,
         temperature: float = 1.0,
@@ -308,7 +318,7 @@ def generate(
         ----------
         prompt : str or Prompt or None, optional
             Prompt from an align workflow to condition PoET model.
-        query : str or bytes or Protein or Complex or Query or None, optional
+        query : str or bytes or Protein or Complex or Query or list of these or None, optional
             Query to use with prompt.
         use_query_structure_in_decoder : bool, optional
             Whether to use query structure in decoder. Default is True.
@@ -340,9 +350,14 @@ def generate(
         EmbeddingsGenerateFuture
             A future object representing the status and information about the generation job.
         """
+        from openprotein.models.structure_generation import StructureGenerationFuture
+
         prompt_api = getattr(self.session, "prompt", None)
         assert isinstance(prompt_api, PromptAPI)
-        query_id = prompt_api._resolve_query(query=query)
+        query_id = prompt_api._resolve_query(query=query) if query is not None else None
+        design_id = (
+            design.job_id if isinstance(design, StructureGenerationFuture) else design
+        )
         if ensemble_weights is not None:
             # NB: for now, ensemble_method is None -> ensemble_method == "arithmetic"
             if ensemble_method is None or (ensemble_method == "arithmetic"):
@@ -364,6 +379,7 @@ def generate(
             max_length=max_length,
             seed=seed,
             query_id=query_id,
+            design_id=design_id,
             use_query_structure_in_decoder=use_query_structure_in_decoder,
             ensemble_weights=ensemble_weights,
             ensemble_method=ensemble_method,
 
@@ -1,13 +1,7 @@
 """
 Fold module for predicting structures on OpenProtein.
-
-isort:skip_file
 """
 
-from .schemas import FoldJob, FoldMetadata
-from .models import FoldModel
-from .esmfold import ESMFoldModel
-from .minifold import MiniFoldModel
 from .alphafold2 import AlphaFold2Model
 from .boltz import (
     Boltz1Model,
@@ -18,6 +12,31 @@
     BoltzConstraint,
     BoltzProperty,
 )
-from .rosettafold3 import RosettaFold3Model
-from .future import FoldResultFuture
+from .esmfold import ESMFoldModel
 from .fold import FoldAPI
+from .future import FoldResultFuture
+from .minifold import MiniFoldModel
+from .models import FoldModel
+from .protenix import ProtenixModel
+from .rosettafold3 import RosettaFold3Model
+from .schemas import FoldJob, FoldMetadata
+
+__all__ = [
+    "FoldJob",
+    "FoldMetadata",
+    "FoldModel",
+    "ESMFoldModel",
+    "MiniFoldModel",
+    "AlphaFold2Model",
+    "ProtenixModel",
+    "Boltz1Model",
+    "Boltz1xModel",
+    "Boltz2Model",
+    "BoltzAffinity",
+    "BoltzConfidence",
+    "BoltzConstraint",
+    "BoltzProperty",
+    "RosettaFold3Model",
+    "FoldResultFuture",
+    "FoldAPI",
+]
@@ -1,15 +1,17 @@
 """Community-based AlphaFold 2 model running using ColabFold."""
 
-import io
 import warnings
-from typing import Any, Sequence
+from typing import Sequence
 
-from openprotein.align import AlignAPI, MSAFuture
+from openprotein.align import MSAFuture
 from openprotein.base import APISession
 from openprotein.common import ModelMetadata
-from openprotein.fold.common import normalize_inputs, serialize_input
-from openprotein.fold.complex import id_generator
-from openprotein.molecules import Protein, DNA, RNA, Ligand, Complex
+from openprotein.fold.common import (
+    msa_future_to_complex,
+    normalize_inputs,
+    serialize_input,
+)
+from openprotein.molecules import DNA, RNA, Complex, Ligand, Protein
 
 from . import api
 from .future import FoldResultFuture
@@ -33,7 +35,7 @@ def __init__(
 
     def fold(
         self,
-        sequences: Sequence[Complex | Protein | str] | MSAFuture | None = None,
+        sequences: Sequence[Complex | Protein | str | bytes] | MSAFuture,
         num_recycles: int | None = None,
         num_models: int = 1,
         num_relax: int = 0,
@@ -44,7 +46,7 @@ def fold(
 
         Parameters
         ----------
-        sequences : List[Complex | Protein | str] | MSAFuture
+        sequences : Sequence[Complex | Protein | str | bytes] | MSAFuture
             List of protein sequences to include in folded output. `Protein` objects must be tagged with an `msa`, which can be a `Protein.single_sequence_mode` for single sequence mode. Alternatively, supply an `MSAFuture` to use all query sequences as a multimer.
         num_recycles : int
             number of times to recycle models
@@ -57,7 +59,6 @@ def fold(
         -------
         job : Job
         """
-        from openprotein.align import AlignAPI
 
         if "msa" in kwargs:
             warnings.warn(
@@ -71,18 +72,7 @@ def fold(
 
         # build the normalized_models from msa
         if isinstance(sequences, MSAFuture):
-            id_gen = id_generator()
-            align_api = getattr(self.session, "align", None)
-            assert isinstance(align_api, AlignAPI)
-            msa = sequences  # rename
-            seed = align_api.get_seed(job_id=msa.job.job_id)
-            _proteins: dict[str, Protein] = {}
-            for seq in seed.split(":"):
-                protein = Protein(sequence=seq)
-                id = next(id_gen)
-                protein.msa = msa.id
-                _proteins[id] = protein
-            normalized_complexes = [Complex(chains=_proteins)]
+            normalized_complexes = [msa_future_to_complex(self.session, sequences)]
 
         else:
             normalized_complexes = normalize_inputs(sequences)
 
@@ -199,93 +199,21 @@ def fold_get_extra_result(
         The result as a numpy array (for "pae", "pde", "plddt") or a list of dictionaries (for "confidence", "affinity").
     """
     if key in {"pae", "pde", "plddt", "ptm"}:
-        formatter = lambda response: np.load(io.BytesIO(response.content))
-    elif key in {"confidence", "affinity"}:
-        formatter = lambda response: response.json()
-    elif key in {"score", "metrics"}:
-        import pandas as pd
-
-        formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
-    else:
-        raise ValueError(f"Unexpected key: {key}")
-    endpoint = PATH_PREFIX + f"/{job_id}/{sequence_or_index}/{key}"
-    try:
-        response = session.get(
-            endpoint,
-        )
-    except HTTPError as e:
-        if e.status_code == 400 and key == "affinity":
-            raise ValueError("affinity not found for request") from None
-        raise e
-    output = formatter(response)
-    return output
-
-
-def fold_get_complex_result(
-    session: APISession, job_id: str, format: Literal["pdb", "mmcif"]
-) -> bytes:
-    """
-    Get encoded result for a complex from the request ID.
 
-    Parameters
-    ----------
-    session : APISession
-        Session object for API communication.
-    job_id : str
-        Job ID to retrieve results from.
-    format : {'pdb', 'mmcif'}
-        Format of the result.
-
-    Returns
-    -------
-    bytes
-        Encoded result for the complex.
-    """
-    endpoint = PATH_PREFIX + f"/{job_id}/complex"
-    response = session.get(
-        endpoint,
-        params={
-            "format": format,
-        },
-    )
-    return response.content
-
-
-def fold_get_complex_extra_result(
-    session: APISession,
-    job_id: str,
-    key: Literal[
-        "pae", "pde", "plddt", "ptm", "confidence", "affinity", "score", "metrics"
-    ],
-) -> "np.ndarray | list[dict] | pd.DataFrame":
-    """
-    Get extra result for a complex from the request ID.
-
-    Parameters
-    ----------
-    session : APISession
-        Session object for API communication.
-    job_id : str
-        Job ID to retrieve results from.
-    key : {'pae', 'pde', 'plddt', 'ptm', 'confidence', 'affinity', 'score', 'metrics'}
-        The type of result to retrieve.
-
-    Returns
-    -------
-    numpy.ndarray or list of dict
-        The result as a numpy array (for "pae", "pde", "plddt") or a list of dictionaries (for "confidence", "affinity").
-    """
-    if key in {"pae", "pde", "plddt", "ptm"}:
-        formatter = lambda response: np.load(io.BytesIO(response.content))
+        def formatter(response):
+            return np.load(io.BytesIO(response.content))
     elif key in {"confidence", "affinity"}:
-        formatter = lambda response: response.json()
+
+        def formatter(response):
+            return response.json()
     elif key in {"score", "metrics"}:
         import pandas as pd
 
-        formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
+        def formatter(response):
+            return pd.read_csv(io.StringIO(response.content.decode()))
     else:
         raise ValueError(f"Unexpected key: {key}")
-    endpoint = PATH_PREFIX + f"/{job_id}/complex/{key}"
+    endpoint = PATH_PREFIX + f"/{job_id}/{sequence_or_index}/{key}"
     try:
         response = session.get(
             endpoint,
@@ -321,28 +249,8 @@ def fold_models_post(
         The outer list represents the batch of requests, and the inner
         list represents the complex, with each item in the list being
         an entity in that complex. A monomer would thus be a single item.
-    num_recycles : int, optional
-        Number of recycles for structure prediction.
-    num_models : int, optional
-        Number of models to generate.
-    num_relax : int, optional
-        Number of relaxation steps.
-    use_potentials : bool, optional
-        Whether to use potentials.
-    diffusion_samples : int, optional
-        Number of diffusion samples (boltz).
-    recycling_steps : int, optional
-        Number of recycling steps (boltz).
-    sampling_steps : int, optional
-        Number of sampling steps (boltz).
-    step_scale : float, optional
-        Step scale (boltz).
-    constraints : dict, optional
-        Constraints to apply.
-    templates : list, optional
-        Templates to use.
-    properties : dict, optional
-        Additional properties.
+    **kwargs
+        Additional keyword arguments to be sent with POST body.
 
     Returns
     -------