Skip to content

Commit 5e1ebbe

Browse files
committed
na binding benchmark
1 parent 4cc4864 commit 5e1ebbe

5 files changed

Lines changed: 6270 additions & 62 deletions

File tree

examples/design_dna_binders.sh

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#!/bin/bash
2+
3+
# This is a benchmark file to reproduce experiments
4+
5+
source /home/domain/data/prog/miniconda3/etc/profile.d/conda.sh
6+
RFDIFFUSION_PATH=../
7+
8+
conda activate rfdiffusion
9+
10+
# define rfdiffusion parameters
11+
12+
declare -A pot_dict
13+
pot_dict[no_potential]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1"] potentials.guide_scale=0'
14+
pot_dict[na_contacts]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1"] potentials.guide_scale=1'
15+
16+
pot_dict[na_contacts_02avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.2"] potentials.guide_scale=1'
17+
pot_dict[na_contacts_04avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.4"] potentials.guide_scale=1'
18+
pot_dict[na_contacts_06avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.6"] potentials.guide_scale=1'
19+
pot_dict[na_contacts_08avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:0.8"] potentials.guide_scale=1'
20+
pot_dict[na_contacts_avg]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,smooth:1"] potentials.guide_scale=1'
21+
22+
pot_dict[na_contacts_px0]='potentials.guiding_potentials=["type:na_contacts,s:2,r_0:8,rep_r_0:7,rep_s:2,d_0:2,rep_r_min:1,predicted:1"] potentials.guide_scale=1'
23+
24+
declare -A motif_dict
25+
motif_dict[de_novo]="contigmap.contigs=[123-183]"
26+
motif_dict[site]="contigmap.contigs=[40-60/A21-22/40-60/B222-222/40-60] inference.ckpt_override_path=$RFDIFFUSION_PATH/models/ActiveSite_ckpt.pt"
27+
motif_dict[binding]="contigmap.contigs=[18-18/A24-24/18-18/A43-43/27-27/A71-71/3-3/A75-75/5-5/A81-81/85-85] inference.ckpt_override_path=$RFDIFFUSION_PATH/models/ActiveSite_ckpt.pt"
28+
29+
mkdir example_outputs/na/
30+
mkdir example_outputs/na/rfdiffusion_outputs
31+
32+
# run rfdiffusion
33+
for motif in de_novo site binding;
34+
do
35+
mkdir example_outputs/na/rfdiffusion_outputs/$motif
36+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
37+
do
38+
echo imsoi $motif $pot >> logs/test_na.log
39+
mkdir example_outputs/na/rfdiffusion_outputs/$motif/$pot
40+
$RFDIFFUSION_PATH/scripts/run_inference.py inference.num_designs=50 inference.deterministic=true \
41+
inference.output_prefix=example_outputs/na/rfdiffusion_outputs/$motif/$pot/imsoi \
42+
"inference.input_pdb=input_pdbs/1M5X.pdb" \
43+
${motif_dict[$motif]} ${pot_dict[$pot]} >> example_outputs/na/rfdiffusion_outputs/test_na.log
44+
done
45+
done
46+
47+
conda deactivate
48+
49+
# add NA structures to designs
50+
51+
for motif in de_novo site binding;
52+
do
53+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
54+
do
55+
for lig in `ls example_outputs/na/rfdiffusion_outputs/$motif/$pot| grep na`
56+
do
57+
cat example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') | egrep -v 'DA|DG|DC|DT' > tmp
58+
cat tmp > example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//')
59+
numatoms=$(tail -1 example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//') | head -c 11 | tail -c 4 | xargs)
60+
cat example_outputs/na/rfdiffusion_outputs/$motif/$pot/$lig | \
61+
awk -v var=$numatoms 'BEGIN { FIELDWIDTHS="6 5 2 1 54"}{printf "%s%5u%s%s%s%s %s\n", $1,$2 + var,$3,$4,$5,$6,$4}' \
62+
>> example_outputs/na/rfdiffusion_outputs/$motif/$pot/$(echo $lig | sed 's/_na//')
63+
done
64+
done
65+
done
66+
67+
68+
# running LigandMPNN
69+
LIGANDMPNN_PATH=../../LigandMPNN/
70+
WD=`pwd`
71+
72+
conda activate ligandmpnn_env
73+
74+
mkdir example_outputs/na/proteinmpnn_output
75+
76+
for motif in de_novo site binding;
77+
do
78+
mkdir example_outputs/na/proteinmpnn_output/$motif
79+
80+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
81+
do
82+
# generate json files for ligandmpnn multiple run
83+
cd $WD
84+
pdb_ids=$WD/example_outputs/na/proteinmpnn_output/$motif/${pot}_ids.json
85+
redesigned_residues=$WD/example_outputs/na/proteinmpnn_output/$motif/${pot}_res.json
86+
echo { > ${pdb_ids}
87+
echo { > ${redesigned_residues}
88+
89+
itdir=$WD/example_outputs/na/proteinmpnn_output/$motif/$pot
90+
for pdb in `ls $itdir | grep pdb | grep -v na `;
91+
do
92+
echo \"$itdir/$pdb\": \"\", >> ${pdb_ids}
93+
echo \"$itdir/$pdb\": \"$(cat $itdir/$pdb | grep 'CA GLY A' | grep '0.00$' | awk 'BEGIN{OFS="";ORS=" "}{print $5, $6}' | sed 's/\ $//' )\", >> ${redesigned_residues}
94+
done
95+
96+
tmp=`cat ${pdb_ids} | sed '$ s/\,$/\n}/'`
97+
echo $tmp > ${pdb_ids}
98+
tmp=`cat ${redesigned_residues} | sed '$ s/\,$/\n}/'`
99+
echo $tmp > ${redesigned_residues}
100+
101+
# run ligandmpnn
102+
cd $LIGANDMPNN_PATH
103+
python3 run.py \
104+
--pdb_path_multi ${pdb_ids} \
105+
--out_folder $WD/example_outputs/na/proteinmpnn_output/$motif/$pot \
106+
--model_type "ligand_mpnn" \
107+
--batch_size 5 --pack_side_chains 1 \
108+
--number_of_packs_per_design 1 \
109+
--pack_with_ligand_context 1
110+
--redesigned_residues_multi ${redesigned_residues}
111+
112+
# write ligandmpnn output sequences to a single file
113+
cd $WD
114+
for pdb in `ls example_outputs/na/proteinmpnn_output/$motif/$pot/seqs | grep fa` ;
115+
do
116+
echo $pdb
117+
cat example_outputs/na/proteinmpnn_output/$motif/$pot/seqs/${pdb} | awk '{print $1, $2}' | grep id= -A 1 | sed 's/,\ id=/_/' | sed 's/,//' >> example_outputs/na/proteinmpnn_output/$motif/$pot/all_seqs.fasta
118+
done
119+
done
120+
done
121+
conda deactivate
122+
123+
# relax designs
124+
conda activate pyrosetta
125+
126+
for motif in de_novo site binding;
127+
do
128+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
129+
do
130+
mkdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/relaxed
131+
python $RFDIFFUSION_PATH/scripts/fast_relax.py -pdbdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/backbones \
132+
-outdir example_outputs/na/proteinmpnn_output/${motif}/${pot}/relaxed -parallel
133+
done
134+
done
135+
136+
conda deactivate
137+
138+
# calculate secondary structures
139+
140+
DSSP_BIN=/home/domain/anur/progs/bin/dsspcmbi
141+
mkdir example_outputs/na/dssp_statistics
142+
143+
for motif in de_novo site binding;
144+
do
145+
mkdir example_outputs/na/dssp_statistics/$motif/
146+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
147+
do
148+
mkdir example_outputs/na/dssp_statistics/$motif//$pot
149+
for f in `ls example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/| grep pdb`
150+
do
151+
$DSSP_BIN example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/$f example_outputs/na/dssp_statistics/$motif//$pot/$f.dssp
152+
done
153+
done
154+
done
155+
156+
157+
# calculate NA binding surfaces
158+
159+
conda activate dmasif
160+
DMASIF_PATH=../../masif_npi
161+
162+
mkdir example_outputs/na/dmasif_output/
163+
164+
for motif in de_novo site binding;
165+
do
166+
mkdir example_outputs/na/dmasif_output/$motif/
167+
for pot in no_potential na_contacts na_contacts_02avg na_contacts_04avg na_contacts_06avg na_contacts_08avg na_contacts_avg na_contacts_px0;
168+
do
169+
mkdir example_outputs/na/dmasif_output/$motif/$pot
170+
ls example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/ | awk '{print $1,"A","CD"}' > example_outputs/na/dmasif_output/$motif/$pot.list
171+
python3 $DMASIF_PATH/train_inf.py inference --experiment_name npi_site_b2 --site --na NA --protonate \
172+
--pdb_list example_outputs/na/dmasif_output/$motif/$pot.list --data_dir example_outputs/na/proteinmpnn_output/$motif/$pot/relaxed/ \
173+
--out_dir example_outputs/na/dmasif_output/$motif/$pot
174+
done
175+
done
176+
done
177+
178+
END_COMMENT
179+
180+
181+
# fold complexes using boltz2
182+
183+
conda activate boltz2
184+
185+
mkdir example_outputs/na/boltz_predictions
186+
187+
for motif in de_novo;
188+
do
189+
mkdir example_outputs/na/boltz_predictions/$motif/
190+
for pot in na_contacts_px0;
191+
do
192+
mkdir example_outputs/na/boltz_predictions/$motif/$pot
193+
mkdir example_outputs/na/boltz_predictions/$pot/conf
194+
195+
cat example_outputs/na/proteinmpnn_output/$motif/$pot/all_seqs.fasta | while read name; read seq
196+
do
197+
echo ${name:1}
198+
cat > example_outputs/na/boltz_predictions/$motif/$pot/conf/${name:1}.yaml << EOL
199+
sequences:
200+
- dna:
201+
id: C
202+
sequence: GCAGAACGTCGTGAGACAGTTCCG
203+
- dna:
204+
id: D
205+
sequence: CGGAACTGTCTCACGACGTTCTGC
206+
- protein:
207+
id: A
208+
msa: empty
209+
EOL
210+
echo ' sequence:' ${seq} >> example_outputs/na/boltz_predictions/$motif/$pot/conf/${name:1}.yaml
211+
done
212+
boltz predict example_outputs/na/boltz_predictions/$motif/$pot/conf/ --out_dir example_outputs/na/boltz_predictions/$motif/$pot/ --output_format pdb
213+
done
214+
done
215+
216+
conda deactivate

0 commit comments

Comments
 (0)