Skip to content

Commit b975e5f

Browse files
author
dohyun-s
committed
Merge remote-tracking branch 'sergey/beta' into beta
2 parents 4cdb187 + 0b33d85 commit b975e5f

12 files changed

Lines changed: 1051 additions & 859 deletions

File tree

AlphaFold2.ipynb

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,14 @@
4646
"source": [
4747
"<img src=\"https://raw.githubusercontent.com/sokrypton/ColabFold/main/.github/ColabFold_Marv_Logo_Small.png\" height=\"200\" align=\"right\" style=\"height:240px\">\n",
4848
"\n",
49-
"# ColabFold - v1.6.0-beta\n",
50-
"**<font color=\"red\">WARNING</font> this \"beta\" notebook is in active development and may be break without warning.** Use the [\"main\"](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) notebook for most stable version.\n",
49+
"# ColabFold - v1.6.0 - beta\n",
50+
"**<font color=\"red\">WARNING</font> this \"beta\" notebook is in active development and may be break without warning.** \n",
51+
"Use the [\"main\"](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) notebook for most stable version.\n",
5152
"\n",
5253
"---\n",
5354
"\n",
5455
"Easy to use protein structure and complex prediction using [AlphaFold2](https://www.nature.com/articles/s41586-021-03819-2) and [Alphafold2-multimer](https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1). Sequence alignments/templates are generated through [MMseqs2](mmseqs.com) and [HHsearch](https://github.com/soedinglab/hh-suite). For more details, see <a href=\"#Instructions\">bottom</a> of the notebook, checkout the [ColabFold GitHub](https://github.com/sokrypton/ColabFold) and read our manuscript. \n",
55-
"Old version: [v1.4](https://colab.research.google.com/github/sokrypton/ColabFold/blob/v1.4.0/AlphaFold2.ipynb)\n",
56+
"Old versions: [v1.4](https://colab.research.google.com/github/sokrypton/ColabFold/blob/v1.4.0/AlphaFold2.ipynb), [v1.5](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb)\n",
5657
"\n",
5758
"[Mirdita M, Schütze K, Moriwaki Y, Heo L, Ovchinnikov S, Steinegger M. ColabFold: Making protein folding accessible to all.\n",
5859
"*Nature Methods*, 2022](https://www.nature.com/articles/s41592-022-01488-1) \n",
@@ -226,8 +227,6 @@
226227
" # install dependencies\n",
227228
" # We have to use \"--no-warn-conflicts\" because colab already has a lot preinstalled with requirements different to ours\n",
228229
" pip install -q --no-warn-conflicts \"colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold@beta\"\n",
229-
" # high risk high gain\n",
230-
" pip install -q \"jax[cuda11_cudnn805]>=0.3.8,<0.4\" -f https://storage.googleapis.com/jax-releases/jax_releases.html\n",
231230
"\n",
232231
" # for debugging\n",
233232
" ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold\n",
@@ -266,7 +265,7 @@
266265
"source": [
267266
"#@title Run Prediction\n",
268267
"#@markdown ### Advanced settings\n",
269-
"model_type = \"auto\" #@param [\"auto\", \"alphafold2_ptm\", \"alphafold2_multimer_v1\", \"alphafold2_multimer_v2\", \"alphafold2_multimer_v3\"]\n",
268+
"model_type = \"auto\" #@param [\"auto\", \"alphafold2\", \"alphafold2_ptm\", \"alphafold2_multimer_v1\", \"alphafold2_multimer_v2\", \"alphafold2_multimer_v3\"]\n",
270269
"#@markdown - if `auto` will use `alphafold2_ptm` for monomer inputs and `alphafold2_multimer_v3` for multimer inputs.\n",
271270
"num_recycles = \"auto\" #@param [\"auto\", \"0\", \"1\", \"3\", \"6\", \"12\", \"24\", \"48\"]\n",
272271
"recycle_early_stop_tolerance = \"auto\" #@param [\"auto\", \"0.0\", \"0.5\", \"1.0\"]\n",
@@ -296,7 +295,7 @@
296295
"from pathlib import Path\n",
297296
"from colabfold.download import download_alphafold_params, default_data_dir\n",
298297
"from colabfold.utils import setup_logging\n",
299-
"from colabfold.run_alphafold import get_queries, run, set_model_type\n",
298+
"from colabfold.run_alphafold import run, set_model_type\n",
300299
"from colabfold.inputs import get_queries\n",
301300
"from colabfold.plot import plot_msa\n",
302301
"\n",
@@ -307,12 +306,12 @@
307306
"except:\n",
308307
" K80_chk = \"0\"\n",
309308
" pass\n",
309+
"\n",
310310
"if \"1\" in K80_chk:\n",
311311
" print(\"WARNING: found GPU Tesla K80: limited to total length < 1000\")\n",
312-
" if \"TF_FORCE_UNIFIED_MEMORY\" in os.environ:\n",
313-
" del os.environ[\"TF_FORCE_UNIFIED_MEMORY\"]\n",
314-
" if \"XLA_PYTHON_CLIENT_MEM_FRACTION\" in os.environ:\n",
315-
" del os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"]\n",
312+
"else:\n",
313+
" os.environ[\"TF_FORCE_UNIFIED_MEMORY\"] = \"1\"\n",
314+
" os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"4.0\"\n",
316315
"\n",
317316
"from colabfold.plot import plot_protein\n",
318317
"from pathlib import Path\n",
@@ -480,33 +479,40 @@
480479
" img = f.read()\n",
481480
" return prefix + base64.b64encode(img).decode('utf-8')\n",
482481
"\n",
483-
"pae = image_to_data_url(os.path.join(jobname,f\"{jobname}{jobname_prefix}_pae.png\"))\n",
482+
"pae_png_path = os.path.join(jobname,f\"{jobname}{jobname_prefix}_pae.png\")\n",
483+
"pae = image_to_data_url(pae_png_path) if os.path.exists(pae_png_path) else None\n",
484484
"cov = image_to_data_url(os.path.join(jobname,f\"{jobname}{jobname_prefix}_coverage.png\"))\n",
485485
"plddt = image_to_data_url(os.path.join(jobname,f\"{jobname}{jobname_prefix}_plddt.png\"))\n",
486-
"display(HTML(f\"\"\"\n",
487-
"<style>\n",
488-
" img {{\n",
489-
" float:left;\n",
490-
" }}\n",
491-
" .full {{\n",
492-
" max-width:100%;\n",
493-
" }}\n",
494-
" .half {{\n",
495-
" max-width:50%;\n",
496-
" }}\n",
497-
" @media (max-width:640px) {{\n",
498-
" .half {{\n",
499-
" max-width:100%;\n",
500-
" }}\n",
501-
" }}\n",
502-
"</style>\n",
503-
"<div style=\"max-width:90%; padding:2em;\">\n",
504-
" <h1>Plots for {escape(jobname)}</h1>\n",
505-
" <img src=\"{pae}\" class=\"full\" />\n",
506-
" <img src=\"{cov}\" class=\"half\" />\n",
507-
" <img src=\"{plddt}\" class=\"half\" />\n",
508-
"</div>\n",
509-
"\"\"\"))\n"
486+
"\n",
487+
"if pae is None:\n",
488+
" display(HTML(f\"\"\"\n",
489+
" <style>\n",
490+
" img {{float:left;}}\n",
491+
" .full {{max-width:100%;}}\n",
492+
" .half {{max-width:50%;}}\n",
493+
" @media (max-width:640px) {{.half {{max-width:100%;}}}}\n",
494+
" </style>\n",
495+
" <div style=\"max-width:90%; padding:2em;\">\n",
496+
" <h1>Plots for {escape(jobname)}</h1>\n",
497+
" <img src=\"{cov}\" class=\"half\" />\n",
498+
" <img src=\"{plddt}\" class=\"half\" />\n",
499+
" </div>\n",
500+
" \"\"\"))\n",
501+
"else:\n",
502+
" display(HTML(f\"\"\"\n",
503+
" <style>\n",
504+
" img {{float:left;}}\n",
505+
" .full {{max-width:100%;}}\n",
506+
" .half {{max-width:50%;}}\n",
507+
" @media (max-width:640px) {{.half {{max-width:100%;}}}}\n",
508+
" </style>\n",
509+
" <div style=\"max-width:90%; padding:2em;\">\n",
510+
" <h1>Plots for {escape(jobname)}</h1>\n",
511+
" <img src=\"{pae}\" class=\"full\" />\n",
512+
" <img src=\"{cov}\" class=\"half\" />\n",
513+
" <img src=\"{plddt}\" class=\"half\" />\n",
514+
" </div>\n",
515+
" \"\"\"))"
510516
],
511517
"execution_count": null,
512518
"outputs": []

README.md

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Link to ["main"](https://github.com/sokrypton/ColabFold) branch.
3636
- Yes, but be **CAREFUL**, the bfactor column is populated with pLDDT confidence values (higher = better). Phenix.phaser expects a "real" bfactor, where (lower = better). See [post](https://twitter.com/cheshireminima/status/1423929241675120643) from Claudia Millán.
3737
- What is the maximum length?
3838
- Limits depends on free GPU provided by Google-Colab `fingers-crossed`
39-
- For GPU: `Tesla T4` or `Tesla P100` with ~16G the max length is ~1400
39+
- For GPU: `Tesla T4` or `Tesla P100` with ~16G the max length is ~2000
4040
- For GPU: `Tesla K80` with ~12G the max length is ~1000
4141
- To check what GPU you got, open a new code cell and type `!nvidia-smi`
4242
- Is it okay to use the MMseqs2 MSA server (`cf.run_mmseqs2`) on a local computer?
@@ -62,34 +62,8 @@ Link to ["main"](https://github.com/sokrypton/ColabFold) branch.
6262
- Is there a way to amber-relax structures without having to rerun alphafold/colabfold from scratch?
6363
- Yes, see this [notebook](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/beta/relax_amber.ipynb).
6464

65-
6665
### Running locally
67-
68-
_Note: Checkout [localcolabfold](https://github.com/YoshitakaMo/localcolabfold) too
69-
70-
It is recommended that you create a conda environment with python version 3.7. If you use a newer python version, you might run into problems when installing tensorflow, as the required version may not be found.
71-
72-
```shell
73-
conda create --name my_colabfold python=3.7
74-
```
75-
Then activate it with `activate my_colabfold`.
76-
77-
Install ColabFold using the `pip` commands below. `pip` will resolve and install all required dependencies and ColabFold should be ready within a few minutes to use. Please check the [JAX documentation](https://github.com/google/jax#pip-installation-gpu-cuda) for how to get JAX to work on your GPU or TPU.
78-
79-
```shell
80-
pip install "colabfold[alphafold] @ git+https://github.com/sokrypton/ColabFold"
81-
pip install -q "jax[cuda]>=0.3.8,<0.4" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
82-
# For template-based predictions also install kalign and hhsuite
83-
conda install -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0
84-
# For amber also install openmm and pdbfixer
85-
conda install -c conda-forge openmm=7.5.1 pdbfixer
86-
```
87-
88-
```shell
89-
colabfold_batch <directory_with_fasta_files> <result_dir>
90-
```
91-
92-
If no GPU or TPU is present, `colabfold_batch` can be executed (slowly) using only a CPU with the `--cpu` parameter.
66+
For instructions on how to install ColabFold locally see: [localcolabfold](https://github.com/YoshitakaMo/localcolabfold)
9367

9468
### Generating MSAs for large scale structure/complex predictions
9569

@@ -153,15 +127,15 @@ Searches against the ColabFoldDB can be done in two different modes:
153127
-----------------
154128
**OLD Updates**
155129
```diff
156-
2023/01/03: The MSA server's faulty hardware from 12/26 was replaced.
157-
There were intermittent failures on 12/26 and 1/3. Currently,
158-
there are no known issues. Let us know if you experience any.
159-
2022/10/10: Bugfix: random_seed was not being used for alphafold-multimer.
160-
Same structure was returned regardless of defined seed. This
161-
has been fixed!
162-
2022/07/13: We have set up a new ColabFold MSA server provided by Korean
163-
Bioinformation Center. It provides accelerated MSA generation,
164-
we updated the UniRef30 to 2022_02 and PDB/PDB70 to 220313.
130+
03Jan2023: The MSA server's faulty hardware from 12/26 was replaced.
131+
There were intermittent failures on 12/26 and 1/3. Currently,
132+
there are no known issues. Let us know if you experience any.
133+
10Oct2022: Bugfix: random_seed was not being used for alphafold-multimer.
134+
Same structure was returned regardless of defined seed. This
135+
has been fixed!
136+
13Jul2022: We have set up a new ColabFold MSA server provided by Korean
137+
Bioinformation Center. It provides accelerated MSA generation,
138+
we updated the UniRef30 to 2022_02 and PDB/PDB70 to 220313.
165139
11Mar2022: We use in default AlphaFold-multimer-v2 weights for complex modeling.
166140
We also offer the old complex modes "AlphaFold-ptm" or "AlphaFold-multimer-v1"
167141
04Mar2022: ColabFold now uses a much more powerful server for MSAs and searches through the ColabFoldDB instead of BFD/MGnify.

colabfold/alphafold/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99

1010
def load_models_and_params(
11+
num_models: int,
1112
use_templates: bool,
1213
num_recycles: Optional[int] = None,
1314
recycle_early_stop_tolerance: Optional[float] = None,
@@ -22,7 +23,7 @@ def load_models_and_params(
2223
use_fuse: bool = True,
2324
use_bfloat16: bool = True,
2425
use_dropout: bool = False,
25-
26+
save_all: bool = False,
2627
) -> List[Tuple[str, model.RunModel, haiku.Params]]:
2728
"""We use only two actual models and swap the parameters to avoid recompiling.
2829
@@ -77,6 +78,12 @@ def load_models_and_params(
7778
else:
7879
model_config.data.common.max_extra_msa = max_extra_seq
7980

81+
# disable some outputs if not being saved
82+
if not save_all:
83+
model_config.model.heads.distogram.weight = 0.0
84+
model_config.model.heads.masked_msa.weight = 0.0
85+
model_config.model.heads.experimentally_resolved.weight = 0.0
86+
8087
# set number of recycles and ensembles
8188
if "multimer" in model_suffix:
8289
if num_recycles is not None:
@@ -117,6 +124,8 @@ def load_models_and_params(
117124
)
118125
# reorder model
119126
for n, model_number in enumerate(model_order):
127+
if n == num_models:
128+
break
120129
model_name = f"model_{model_number}"
121130
for m in model_runner_and_params_build_order:
122131
if model_name == m[0]:

colabfold/alphafold/relax.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#############
22
# relax functions
33
#############
4-
4+
from pathlib import Path
55
from alphafold.relax import relax
66
from alphafold.common import protein
77

0 commit comments

Comments
 (0)