Skip to content

Commit c2a2478

Browse files
committed
added tools to fragment_utils.py which can be used to automatically generate partial reattachment reactions
1 parent 61ffa80 commit c2a2478

2 files changed

Lines changed: 287 additions & 43 deletions

File tree

rmgpy/molecule/fragment.py

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,49 +1089,52 @@ def sliceitup_aliph(self, molecule, size_threshold=5):
10891089
# mol_set contains new set of fragments
10901090
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
10911091
# check all fragments' size
1092-
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
1093-
if len(mol_set) == 2:
1094-
frag1 = Chem.MolToSmiles(mol_set[0])
1095-
frag2 = Chem.MolToSmiles(mol_set[1])
1096-
1097-
frag1_R = frag1.count("Na")
1098-
frag1_L = frag1.count("K")
1099-
frag2_R = frag2.count("Na")
1100-
frag2_L = frag2.count("K")
1101-
1102-
if frag1_R > frag2_R and frag1_L <= frag2_L:
1103-
frag1_smi = frag1.replace("*", "L")
1104-
frag2_smi = frag2.replace("*", "R")
1105-
elif frag1_L > frag2_L and frag1_R <= frag2_R:
1106-
frag1_smi = frag1.replace("*", "R")
1107-
frag2_smi = frag2.replace("*", "L")
1108-
elif frag2_L > frag1_L and frag2_R <= frag1_R:
1109-
frag1_smi = frag1.replace("*", "R")
1110-
frag2_smi = frag2.replace("*", "L")
1111-
elif frag2_R > frag1_R and frag2_L <= frag1_L:
1112-
frag1_smi = frag1.replace("*", "R")
1113-
frag2_smi = frag2.replace("*", "L")
1114-
elif randint(0,1)==1:
1115-
frag1_smi = frag1.replace("*", "L")
1116-
frag2_smi = frag2.replace("*", "R")
1117-
else:
1118-
frag1_smi = frag1.replace("*", "R")
1119-
frag2_smi = frag2.replace("*", "L")
1120-
1121-
frag_list = [frag1_smi, frag2_smi]
1122-
1123-
elif len(mol_set) > 2: # means it cut into 3 fragments
1124-
frag_list = []
1125-
for ind, rdmol in enumerate(mol_set):
1126-
frag = Chem.MolToSmiles(rdmol)
1127-
if frag.count("*") > 1:
1128-
frag_smi = frag.replace("*", "R")
1092+
try:
1093+
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
1094+
if len(mol_set) == 2:
1095+
frag1 = Chem.MolToSmiles(mol_set[0])
1096+
frag2 = Chem.MolToSmiles(mol_set[1])
1097+
1098+
frag1_R = frag1.count("Na")
1099+
frag1_L = frag1.count("K")
1100+
frag2_R = frag2.count("Na")
1101+
frag2_L = frag2.count("K")
1102+
1103+
if frag1_R > frag2_R and frag1_L <= frag2_L:
1104+
frag1_smi = frag1.replace("*", "L")
1105+
frag2_smi = frag2.replace("*", "R")
1106+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
1107+
frag1_smi = frag1.replace("*", "R")
1108+
frag2_smi = frag2.replace("*", "L")
1109+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
1110+
frag1_smi = frag1.replace("*", "R")
1111+
frag2_smi = frag2.replace("*", "L")
1112+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
1113+
frag1_smi = frag1.replace("*", "R")
1114+
frag2_smi = frag2.replace("*", "L")
1115+
elif randint(0,1)==1:
1116+
frag1_smi = frag1.replace("*", "L")
1117+
frag2_smi = frag2.replace("*", "R")
11291118
else:
1130-
frag_smi = frag.replace("*", "L")
1131-
frag_list.append(frag_smi)
1132-
break
1133-
else:
1134-
# turn to next matched_atom_map
1119+
frag1_smi = frag1.replace("*", "R")
1120+
frag2_smi = frag2.replace("*", "L")
1121+
1122+
frag_list = [frag1_smi, frag2_smi]
1123+
1124+
elif len(mol_set) > 2: # means it cut into 3 fragments
1125+
frag_list = []
1126+
for ind, rdmol in enumerate(mol_set):
1127+
frag = Chem.MolToSmiles(rdmol)
1128+
if frag.count("*") > 1:
1129+
frag_smi = frag.replace("*", "R")
1130+
else:
1131+
frag_smi = frag.replace("*", "L")
1132+
frag_list.append(frag_smi)
1133+
break
1134+
else:
1135+
# turn to next matched_atom_map
1136+
continue
1137+
except:
11351138
continue
11361139
else:
11371140
# no match for this pattern

rmgpy/molecule/fragment_utils.py

Lines changed: 242 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from rmgpy.molecule.fragment import Fragment
1+
from rmgpy.molecule.fragment import Fragment,CuttingLabel
2+
from rmgpy.molecule.molecule import Bond
3+
from rdkit import Chem
4+
from numpy.random import randint
25
from rmgpy.tools.canteramodel import Cantera
36
from rmgpy.chemkin import load_chemkin_file
47
import re
@@ -310,3 +313,241 @@ def merge_frag_list(to_be_merged):
310313
# print('{}% of fragments fully merged...'.format(np.round(100*(i+1)/len(flattened_matches_random)),1))
311314
# print(newfraglist)
312315
return newfraglist
316+
317+
import re
318+
def check_if_radical_near_cutting_label(species_smiles):
319+
species_fragment = Fragment().from_smiles_like_string(species_smiles)
320+
for i, atom in enumerate(species_fragment.atoms):
321+
species_fragment.atoms[i].id = i
322+
for atom in species_fragment.atoms:
323+
if atom.radical_electrons ==1:
324+
bonded_atoms = list(atom.bonds.keys())
325+
bonded_atom_types = [type(x) for x in bonded_atoms]
326+
if CuttingLabel in bonded_atom_types:
327+
radical_near_cutting_label = atom
328+
nearest_cutting_label = bonded_atoms[bonded_atom_types.index(CuttingLabel)]
329+
return species_fragment, nearest_cutting_label
330+
for atom2 in bonded_atoms:
331+
bonded_atoms_2 = list(atom2.bonds.keys())
332+
bonded_atom_types_2 = [type(x) for x in bonded_atoms_2]
333+
if CuttingLabel in bonded_atom_types_2:
334+
radical_near_cutting_label = atom2
335+
nearest_cutting_label = bonded_atoms_2[bonded_atom_types_2.index(CuttingLabel)]
336+
return species_fragment, nearest_cutting_label
337+
else:
338+
return False
339+
340+
def check_if_pibond_near_cutting_label(species_smiles):
341+
species_fragment = Fragment().from_smiles_like_string(species_smiles)
342+
for i, atom in enumerate(species_fragment.atoms):
343+
species_fragment.atoms[i].id = i
344+
for atom in species_fragment.atoms:
345+
neighboring_bond_orders = [x.order for x in atom.bonds.values()]
346+
if 2 in neighboring_bond_orders:
347+
bonded_atoms = list(atom.bonds.keys())
348+
bonded_atom_types = [type(x) for x in bonded_atoms]
349+
if CuttingLabel in bonded_atom_types:
350+
nearest_cutting_label = bonded_atoms[bonded_atom_types.index(CuttingLabel)]
351+
return species_fragment, nearest_cutting_label
352+
for atom2 in bonded_atoms:
353+
bonded_atoms_2 = list(atom2.bonds.keys())
354+
bonded_atom_types_2 = [type(x) for x in bonded_atoms_2]
355+
if CuttingLabel in bonded_atom_types_2:
356+
nearest_cutting_label = bonded_atoms_2[bonded_atom_types_2.index(CuttingLabel)]
357+
return species_fragment, nearest_cutting_label
358+
else:
359+
return False
360+
361+
def merge_fragment_a_to_cutting_label_on_b(smiles_a,smiles_b,cuttinglabel):
362+
a_frag = Fragment().from_smiles_like_string(smiles_a)
363+
b_frag = Fragment().from_smiles_like_string(smiles_b)
364+
if "RL".replace(cuttinglabel.symbol,"") not in [x.symbol for x in a_frag.atoms]:
365+
return "ERROR"
366+
for vertex in b_frag.vertices:
367+
if isinstance(vertex, CuttingLabel):
368+
369+
if vertex.symbol == cuttinglabel.symbol and str(vertex.bonds) == str(cuttinglabel.bonds):
370+
cutb = vertex
371+
372+
atom2 = list(cutb.edges.keys())[0]
373+
b_frag.remove_atom(cutb)
374+
break
375+
if cutb.symbol[0] == 'L':
376+
Ctl = cutb.symbol.replace('L', 'R')
377+
else: # that means this CuttingLabel is R something
378+
Ctl = cutb.symbol.replace('R', 'L')
379+
380+
# merge to frag_spe1
381+
for vertex in a_frag.vertices:
382+
if isinstance(vertex, CuttingLabel):
383+
if vertex.symbol == Ctl:
384+
cuta = vertex
385+
atom1 = list(cuta.edges.keys())[0]
386+
a_frag.remove_atom(cuta)
387+
break
388+
389+
# new merged fragment
390+
new_frag = a_frag.merge(b_frag)
391+
new_frag.add_bond(Bond(atom1=atom1, atom2=atom2, order=1))
392+
new_frag = new_frag.copy(deep=True)
393+
new_frag.update()
394+
for i, atom in enumerate(new_frag.atoms):
395+
new_frag.atoms[i].id = i
396+
return new_frag # return Fragment obtl
397+
398+
def get_single_cc_bonds(frag):
399+
single_cc_bonds = [tuple(sorted([x.atom1.id, x.atom2.id])) for x in frag.get_all_edges() if x.atom1.symbol =="C" and x.atom2.symbol=="C" and x.order ==1]
400+
return frag, single_cc_bonds
401+
402+
def get_atoms_neighboring_bond(frag, bond_idx):
403+
atom1id,atom2id = bond_idx
404+
atom1 = frag.atoms[atom1id]
405+
atom2 = frag.atoms[atom2id]
406+
neighboring_atoms = [x for x in list(set(list(atom1.bonds.keys())+list(atom2.bonds.keys()))) if x!=atom1 and x!=atom2]
407+
return frag, neighboring_atoms
408+
409+
def flatten_list_of_lists(list_of_lists):
410+
return [item for lst in list_of_lists for item in lst]
411+
412+
def get_atoms_nearby_bond(frag, bond_idx):
413+
atom1id,atom2id = bond_idx
414+
atom1 = frag.atoms[atom1id]
415+
atom2 = frag.atoms[atom2id]
416+
frag, neighboring_atoms = get_atoms_neighboring_bond(frag, bond_idx)
417+
neighboring_atoms = [x for x in neighboring_atoms]
418+
neighboring_atoms_ = []
419+
for atom in neighboring_atoms:
420+
neighboring_atoms_ += [x for x in atom.bonds.keys() ]
421+
422+
nearby_atoms = list(set(neighboring_atoms + neighboring_atoms_))
423+
return frag, nearby_atoms
424+
425+
def cut_specific_bond(frag, cut_bond_idx):
426+
427+
rdfrag, atom_mapping = frag.to_rdkit_mol(remove_h=False, return_mapping=True, save_order=True)
428+
429+
atom_mapping_flipped = {v:k for k,v in atom_mapping.items()}
430+
431+
432+
rdfrag,atom_mapping_flipped = replace_cutting_labels_with_metals(rdfrag, atom_mapping_flipped)
433+
atom1 = atom_mapping[frag.atoms[cut_bond_idx[0]]]
434+
atom2 = atom_mapping[frag.atoms[cut_bond_idx[1]]]
435+
436+
bond_to_cut = rdfrag.GetBondBetweenAtoms(atom1,atom2)
437+
438+
frag_list = cut_and_place_cutting_labels(rdfrag, bond_to_cut)
439+
return frag_list
440+
441+
def replace_cutting_labels_with_metals(rdfrag,atom_mapping_flipped):
442+
for idx,atom in atom_mapping_flipped.items():
443+
if isinstance(atom, CuttingLabel):
444+
if atom.symbol == "R":
445+
rdfrag.GetAtomWithIdx(idx).SetAtomicNum(11) # [Na]
446+
else:
447+
rdfrag.GetAtomWithIdx(idx).SetAtomicNum(19) # [K]
448+
449+
return rdfrag, atom_mapping_flipped
450+
451+
def cut_and_place_cutting_labels(rdfrag, bond_to_cut):
452+
453+
newmol = Chem.RWMol(rdfrag)
454+
new_mol = Chem.FragmentOnBonds(newmol, [bond_to_cut.GetIdx()], dummyLabels=[(0,0)])
455+
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
456+
457+
if len(mol_set) == 2:
458+
frag1 = Chem.MolToSmiles(mol_set[0])
459+
frag2 = Chem.MolToSmiles(mol_set[1])
460+
461+
frag1_R = frag1.count("Na")
462+
frag1_L = frag1.count("K")
463+
frag2_R = frag2.count("Na")
464+
frag2_L = frag2.count("K")
465+
466+
if frag1_R > frag2_R and frag1_L <= frag2_L:
467+
frag1_smi = frag1.replace("*", "L")
468+
frag2_smi = frag2.replace("*", "R")
469+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
470+
frag1_smi = frag1.replace("*", "R")
471+
frag2_smi = frag2.replace("*", "L")
472+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
473+
frag1_smi = frag1.replace("*", "R")
474+
frag2_smi = frag2.replace("*", "L")
475+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
476+
frag1_smi = frag1.replace("*", "R")
477+
frag2_smi = frag2.replace("*", "L")
478+
elif randint(0,1)==1:
479+
frag1_smi = frag1.replace("*", "L")
480+
frag2_smi = frag2.replace("*", "R")
481+
else:
482+
frag1_smi = frag1.replace("*", "R")
483+
frag2_smi = frag2.replace("*", "L")
484+
frag1_smi = frag1_smi.replace("[Na]", "R")
485+
frag1_smi = frag1_smi.replace("[K]", "L")
486+
frag2_smi = frag2_smi.replace("[Na]","R")
487+
frag2_smi = frag2_smi.replace("[K]","L")
488+
frag_list = [frag1_smi,frag2_smi]
489+
frag_list = [Fragment().from_smiles_like_string(x).smiles for x in frag_list]
490+
491+
return frag_list
492+
493+
def add_starting_fragment_cut_if_needed(species_fragment, nearest_cutting_label, starting_fragment_smiles,species_cutting_threshold = 20):
494+
merged_frag = merge_fragment_a_to_cutting_label_on_b(starting_fragment_smiles,species_fragment.smiles, nearest_cutting_label)
495+
# print("ADD STARTING FRAG - FRAG\n",[([x.id for x in k.bonds.keys()], v) for k,v in {atom:atom.id for atom in merged_frag.atoms}.items()])
496+
merged_frag_smiles = merged_frag.smiles
497+
if merged_frag_smiles.count("C") + merged_frag_smiles.count("c") > species_cutting_threshold:
498+
cuttable_bonds = return_cuttable_bonds(merged_frag)
499+
options = []
500+
for cuttable_bond_idx_tuple in cuttable_bonds:
501+
frag1smi, frag2smi = cut_specific_bond(merged_frag, cuttable_bond_idx_tuple)
502+
options.append([frag1smi,frag2smi])
503+
504+
return options
505+
else:
506+
return [[merged_frag_smiles]]
507+
508+
def check_if_bond_is_cuttable(frag, bond_idx_tuple):
509+
frag, nearby_atoms = get_atoms_nearby_bond(frag, bond_idx_tuple)
510+
511+
nearby_bond_orders = flatten_list_of_lists([[x.order for x in atom.bonds.values()] for atom in nearby_atoms])
512+
nearby_cl_bool = (sum([1 for x in nearby_atoms if type(x)==CuttingLabel])>0)
513+
nearby_radical_bool = (sum([x.radical_electrons for x in nearby_atoms]) > 0)
514+
nearby_pi_bond_bool = any(x>1 for x in nearby_bond_orders)
515+
if nearby_radical_bool ==False and nearby_pi_bond_bool == False and nearby_cl_bool ==False:
516+
return True
517+
else:
518+
return False
519+
520+
def return_cuttable_bonds(frag):
521+
# print("RETURN CUTTABLE BONDS - FRAG\n",[([x.id for x in k.bonds.keys()], v) for k,v in {atom:atom.id for atom in frag.atoms}.items()])
522+
frag, single_cc_bonds = get_single_cc_bonds(frag)
523+
524+
# print("SINGLE CC BONDS",single_cc_bonds)
525+
cuttable_bonds = []
526+
for bond_idx_tuple in single_cc_bonds:
527+
cuttable_bool = check_if_bond_is_cuttable(frag, bond_idx_tuple)
528+
if cuttable_bool:
529+
cuttable_bonds.append(bond_idx_tuple)
530+
cuttable_bonds = list(set(cuttable_bonds))
531+
return cuttable_bonds
532+
533+
def process_new_fragment(species_smiles, starting_fragment_smiles,species_cutting_threshold = 20):
534+
535+
radical_result = check_if_radical_near_cutting_label(species_smiles)
536+
537+
538+
if radical_result != False:
539+
species_fragment, nearest_cutting_label = radical_result
540+
options = add_starting_fragment_cut_if_needed(species_fragment, nearest_cutting_label, starting_fragment_smiles,species_cutting_threshold = species_cutting_threshold)
541+
return options
542+
else:
543+
pibond_result = check_if_pibond_near_cutting_label(species_smiles)
544+
if pibond_result!=False:
545+
species_fragment, nearest_cutting_label = pibond_result
546+
options = add_starting_fragment_cut_if_needed(species_fragment, nearest_cutting_label, starting_fragment_smiles,species_cutting_threshold = species_cutting_threshold)
547+
return options
548+
else:
549+
return species_smiles
550+
551+
def make_new_reaction_string(species_smiles, starting_fragment_smiles, frag_list):
552+
frag_list_str = " + ".join(frag_list)
553+
return f"{species_smiles} + {starting_fragment_smiles} => {frag_list_str}"

0 commit comments

Comments
 (0)