|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# This is a benchmark file to reproduce experiments |
| 4 | + |
| 5 | +source /home/domain/data/prog/miniconda3/etc/profile.d/conda.sh |
| 6 | +RFDIFFUSION_PATH=../ |
| 7 | + |
| 8 | +conda activate rfdiffusion |
| 9 | + |
| 10 | +# define rfdiffusion parameters |
| 11 | + |
| 12 | +declare -A pot_dict |
| 13 | +pot_dict[no_potential]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1"] potentials.guide_scale=0' |
| 14 | +pot_dict[na_contacts]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1"] potentials.guide_scale=1' |
| 15 | + |
| 16 | +pot_dict[na_contacts_02avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.2"] potentials.guide_scale=1' |
| 17 | +pot_dict[na_contacts_04avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.4"] potentials.guide_scale=1' |
| 18 | +pot_dict[na_contacts_06avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.6"] potentials.guide_scale=1' |
| 19 | +pot_dict[na_contacts_08avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.8"] potentials.guide_scale=1' |
| 20 | +pot_dict[na_contacts_avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:1"] potentials.guide_scale=1' |
| 21 | + |
| 22 | +pot_dict[na_contacts_px0]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,predicted:1"] potentials.guide_scale=1' |
| 23 | + |
| 24 | +declare -A motif_dict |
| 25 | +motif_dict[de_novo]="contigmap.contigs=[123-183]" |
| 26 | +motif_dict[site]="contigmap.contigs=[40-60/A21-22/40-60/B222-222/40-60] inference.ckpt_override_path=$RFDIFFUSION_PATH/models/ActiveSite_ckpt.pt" |
| 27 | +motif_dict[binding]="contigmap.contigs=[18-18/A24-24/18-18/A43-43/27-27/A71-71/3-3/A75-75/5-5/A81-81/85-85] inference.ckpt_override_path=$RFDIFFUSION_PATH/models/ActiveSite_ckpt.pt" |
| 28 | + |
| 29 | +mkdir example_outputs/na/ |
| 30 | +mkdir example_outputs/na/rfdiffusion_outputs |
| 31 | + |
| 32 | +# run rfdiffusion |
| 33 | +for motif in de_novo site binding; |
| 34 | +do |
| 35 | + mkdir example_outputs/na/rfdiffusion_outputs/$motif |
| 36 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 37 | + do |
| 38 | + echo imsoi $motif $pot >> logs/test_na.log |
| 39 | + mkdir example_outputs/na/rfdiffusion_outputs/$motif/$pot |
| 40 | + $RFDIFFUSION_PATH/scripts/run_inference.py inference.num_designs=50 inference.deterministic=true \ |
| 41 | + inference.output_prefix=example_outputs/na/rfdiffusion_outputs/$motif/$pot/imsoi \ |
| 42 | + "inference.input_pdb=input_pdbs/1M5X.pdb" \ |
| 43 | + ${motif_dict[$motif]} ${pot_dict[$pot]} >> example_outputs/na/rfdiffusion_outputs/test_na.log |
| 44 | + done |
| 45 | +done |
| 46 | + |
| 47 | +conda deactivate |
| 48 | + |
| 49 | +# add NA structures to designs |
| 50 | + |
| 51 | +for motif in de_novo site binding; |
| 52 | +do |
| 53 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 54 | + do |
| 55 | + for lig in `ls example_outputs/na/rfdiffusion_outputs/$motif/$pot| grep na` |
| 56 | + do |
| 57 | + cat example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') | egrep -v 'DA|DG|DC|DT' > tmp |
| 58 | + cat tmp > example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') |
| 59 | + numatoms=$(tail -1 example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') | head -c 11 | tail -c 4 | xargs) |
| 60 | + cat example_outputs/na/rfdiffusion_outputs/$motif/$pot/$lig | \ |
| 61 | + awk -v var=$numatoms 'BEGIN { FIELDWIDTHS="6 5 2 1 54"}{printf "%s%5u%s%s%s%s %s\n", $1,$2 + var,$3,$4,$5,$6,$4}' \ |
| 62 | + >> example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') |
| 63 | + done |
| 64 | + done |
| 65 | +done |
| 66 | + |
| 67 | + |
| 68 | +# running LigandMPNN |
| 69 | +LIGANDMPNN_PATH=../../LigandMPNN/ |
| 70 | +WD=`pwd` |
| 71 | + |
| 72 | +conda activate ligandmpnn_env |
| 73 | + |
| 74 | +mkdir example_outputs/na/proteinmpnn_output |
| 75 | + |
| 76 | +for motif in de_novo site binding; |
| 77 | +do |
| 78 | + mkdir example_outputs/na/proteinmpnn_output/$motif |
| 79 | + |
| 80 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 81 | + do |
| 82 | + # generate json files for ligandmpnn multiple run |
| 83 | + cd $WD |
| 84 | + pdb_ids=$WD/example_outputs/na/proteinmpnn_output/$motif/${pot}_ids.json |
| 85 | + redesigned_residues=$WD/example_outputs/na/proteinmpnn_output/$motif/${pot}_res.json |
| 86 | + echo { > ${pdb_ids} |
| 87 | + echo { > ${redesigned_residues} |
| 88 | + |
| 89 | + itdir=$WD/example_outputs/na/proteinmpnn_output/$motif/$pot |
| 90 | + for pdb in `ls $itdir | grep pdb | grep -v na `; |
| 91 | + do |
| 92 | + echo \"$itdir/$pdb\": \"\", >> ${pdb_ids} |
| 93 | + echo \"$itdir/$pdb\": \"$(cat $itdir/$pdb | grep 'CA GLY A' | grep '0.00$' | awk 'BEGIN{OFS="";ORS=" "}{print $5, $6}' | sed 's/\ $//' )\", >> ${redesigned_residues} |
| 94 | + done |
| 95 | + |
| 96 | + tmp=`cat ${pdb_ids} | sed '$ s/\,$/\n}/'` |
| 97 | + echo $tmp > ${pdb_ids} |
| 98 | + tmp=`cat ${redesigned_residues} | sed '$ s/\,$/\n}/'` |
| 99 | + echo $tmp > ${redesigned_residues} |
| 100 | + |
| 101 | + # run ligandmpnn |
| 102 | + cd $LIGANDMPNN_PATH |
| 103 | + python3 run.py \ |
| 104 | + --pdb_path_multi ${pdb_ids} \ |
| 105 | + --out_folder $WD/example_outputs/na/proteinmpnn_output/$motif/$pot \ |
| 106 | + --model_type "ligand_mpnn" \ |
| 107 | + --batch_size 5 --pack_side_chains 1 \ |
| 108 | + --number_of_packs_per_design 1 \ |
| 109 | + --pack_with_ligand_context 1 |
| 110 | + --redesigned_residues_multi ${redesigned_residues} |
| 111 | + |
| 112 | + # write ligandmpnn output sequences to a single file |
| 113 | + cd $WD |
| 114 | + for pdb in `ls example_outputs/na/proteinmpnn_output/$motif/$pot/seqs | grep fa` ; |
| 115 | + do |
| 116 | + echo $pdb |
| 117 | + cat example_outputs/na/proteinmpnn_output/$motif/$pot/seqs/${pdb} | awk '{print $1, $2}' | grep id= -A 1 | sed 's/,\ id=/_/' | sed 's/,//' >> example_outputs/na/proteinmpnn_output/$motif/$pot/all_seqs.fasta |
| 118 | + done |
| 119 | + done |
| 120 | +done |
| 121 | +conda deactivate |
| 122 | + |
| 123 | +# relax designs |
| 124 | +conda activate pyrosetta |
| 125 | + |
| 126 | +for motif in de_novo site binding; |
| 127 | +do |
| 128 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 129 | + do |
| 130 | + mkdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/relaxed |
| 131 | + python $RFDIFFUSION_PATH/scripts/fast_relax.py -pdbdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/backbones \ |
| 132 | + -outdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/relaxed -parallel |
| 133 | + done |
| 134 | +done |
| 135 | + |
| 136 | +conda deactivate |
| 137 | + |
| 138 | +# calculate secondary structures |
| 139 | + |
| 140 | +DSSP_BIN=/home/domain/anur/progs/bin/dsspcmbi |
| 141 | +mkdir example_outputs/na/dssp_statistics |
| 142 | + |
| 143 | +for motif in de_novo site binding; |
| 144 | +do |
| 145 | + mkdir example_outputs/na/dssp_statistics/$motif/ |
| 146 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 147 | + do |
| 148 | + mkdir example_outputs/na/dssp_statistics/$motif//$pot |
| 149 | + for f in `ls example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/| grep pdb` |
| 150 | + do |
| 151 | + $DSSP_BIN example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/$f example_outputs/na/dssp_statistics/$motif//$pot/$f.dssp |
| 152 | + done |
| 153 | + done |
| 154 | +done |
| 155 | + |
| 156 | + |
| 157 | +# calculate NA binding surfaces |
| 158 | + |
| 159 | +conda activate dmasif |
| 160 | +DMASIF_PATH=../../masif_npi |
| 161 | + |
| 162 | +mkdir example_outputs/na/dmasif_output/ |
| 163 | + |
| 164 | +for motif in de_novo site binding; |
| 165 | +do |
| 166 | + mkdir example_outputs/na/dmasif_output/$motif/ |
| 167 | + for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0; |
| 168 | + do |
| 169 | + mkdir example_outputs/na/dmasif_output/$motif/$pot |
| 170 | + ls example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/ | awk '{print $1,"A","CD"}' > example_outputs/na/dmasif_output/$motif/$pot.list |
| 171 | + python3 $DMASIF_PATH/train_inf.py inference --experiment_name npi_site_b2 --site --na NA --protonate \ |
| 172 | + --pdb_list example_outputs/na/dmasif_output/$motif/$pot.list --data_dir example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/ \ |
| 173 | + --out_dir example_outputs/na/dmasif_output/$motif/$pot |
| 174 | + done |
| 175 | + done |
| 176 | +done |
| 177 | + |
| 178 | +END_COMMENT |
| 179 | + |
| 180 | + |
| 181 | +# fold complexes using boltz2 |
| 182 | + |
| 183 | +conda activate boltz2 |
| 184 | + |
| 185 | +mkdir example_outputs/na/boltz_predictions |
| 186 | + |
| 187 | +for motif in de_novo; |
| 188 | +do |
| 189 | + mkdir example_outputs/na/boltz_predictions/$motif/ |
| 190 | + for pot in na_contacts_px0; |
| 191 | + do |
| 192 | + mkdir example_outputs/na/boltz_predictions/$motif/$pot |
| 193 | + mkdir example_outputs/na/boltz_predictions/$pot/conf |
| 194 | + |
| 195 | + cat example_outputs/na/proteinmpnn_output/$motif/$pot/all_seqs.fasta | while read name; read seq |
| 196 | + do |
| 197 | + echo ${name:1} |
| 198 | + cat > example_outputs/na/boltz_predictions/$motif/$pot/conf/${name:1}.yaml << EOL |
| 199 | +sequences: |
| 200 | + - dna: |
| 201 | + id: C |
| 202 | + sequence: GCAGAACGTCGTGAGACAGTTCCG |
| 203 | + - dna: |
| 204 | + id: D |
| 205 | + sequence: CGGAACTGTCTCACGACGTTCTGC |
| 206 | + - protein: |
| 207 | + id: A |
| 208 | + msa: empty |
| 209 | +EOL |
| 210 | + echo ' sequence:' ${seq} >> example_outputs/na/boltz_predictions/$motif/$pot/conf/${name:1}.yaml |
| 211 | + done |
| 212 | + boltz predict example_outputs/na/boltz_predictions/$motif/$pot/conf/ --out_dir example_outputs/na/boltz_predictions/$motif/$pot/ --output_format pdb |
| 213 | + done |
| 214 | +done |
| 215 | + |
| 216 | +conda deactivate |
0 commit comments