Skip to content

Commit 89ed160

Browse files
committed
more aggresive pruning of torsion fragments
1 parent 32738ed commit 89ed160

File tree

1 file changed

+43
-113
lines changed

1 file changed

+43
-113
lines changed

PoltypeModules/lTorsionFragmentPostProcessing.py

Lines changed: 43 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -165,38 +165,40 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
165165
new_order.insert(2, second_idx)
166166
tmp_mol = Chem.RenumberAtoms(tmp_mol, new_order)
167167
rdmolfiles.MolToMolFile(tmp_mol, f'{fname}_post.mol')
168-
sdffile = f"{fname}_post.mol"
168+
sdffile = f"{fname}_post.mol"
169169
mol = Chem.MolFromMolFile(sdffile,removeHs=False)
170170

171171
# we have the "correct" fragment to work on
172172
# below we focus on trimming the fragment
173-
173+
174174
# all single bonds are eligible to cut,
175175
single_bonds = []
176176
pattern = Chem.MolFromSmarts('[*;!#1][*;!#1]')
177177
matches = mol.GetSubstructMatches(pattern, uniquify=False)
178178
for match in matches:
179-
single_bonds.append(list(match))
179+
match = list(match)
180+
match.sort()
181+
if match not in single_bonds:
182+
single_bonds.append(match)
180183

181-
# except non-trivalence nitrogen
184+
# record non-trivalence nitrogen
182185
special_nitrogen = []
183186
pattern = Chem.MolFromSmarts('[#7X2][*]')
184187
matches = mol.GetSubstructMatches(pattern, uniquify=False)
185188
for match in matches:
186189
special_nitrogen.append([match[0], match[1]])
187190
special_nitrogen.append([match[1], match[0]])
188191

189-
190-
# record the aromatic nitrogen (n-*),
191-
aromatic_nitrogen = {}
192-
pattern = Chem.MolFromSmarts('[n][*]')
192+
# record NSP and its connections
193+
nsp_atoms = {}
194+
pattern = Chem.MolFromSmarts('[#7,#15,#16][*]')
193195
matches = mol.GetSubstructMatches(pattern, uniquify=False)
194196
for match in matches:
195-
n, x = match
196-
if n not in aromatic_nitrogen.keys():
197-
aromatic_nitrogen[n] = [x]
197+
nsp, x = match
198+
if nsp not in nsp_atoms.keys():
199+
nsp_atoms[nsp] = [x]
198200
else:
199-
aromatic_nitrogen[n] += [x]
201+
nsp_atoms[nsp] += [x]
200202

201203
# get the atom indices of torsion
202204
torsion_idx_list = []
@@ -213,54 +215,6 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
213215
if neig_idx not in torsion_idx_list:
214216
torsion_idx_list.append(neig_idx)
215217

216-
# OLD decision
217-
# Move the polar group from ortho to para position
218-
# if there are two polar groups on two ortho- site,
219-
# only the first-appearing one (smaller index) will
220-
# be moved, and the second one will be deleted
221-
# Chengwen Liu
222-
# Oct 2024
223-
224-
#atomsInSixMemRing = []
225-
#pattern = Chem.MolFromSmarts('[a;r6]')
226-
#matches = mol.GetSubstructMatches(pattern)
227-
#for match in matches:
228-
# if len(match) == 1:
229-
# atomsInSixMemRing.append(match[0])
230-
#
231-
#ortho_atom = []
232-
#paraH = []
233-
#pattern = Chem.MolFromSmarts('[*;!#1][R][R][R][R][H]')
234-
#matches = mol.GetSubstructMatches(pattern, uniquify=False)
235-
#for match in matches:
236-
# mat = [match[0], match[1]]
237-
# if mat == [1,2]: # poltype always uses 1,2
238-
# if (match[2] not in ortho_atom) and (match[2] in atomsInSixMemRing):
239-
# ortho_atom.append(match[2])
240-
# paraH.append(match[5])
241-
# if mat == [2,1]: # poltype always uses 1,2
242-
# if (match[2] not in ortho_atom) and (match[2] in atomsInSixMemRing):
243-
# ortho_atom.append(match[2])
244-
# paraH.append(match[5])
245-
#
246-
#ortho2paraH = dict(zip(ortho_atom, paraH))
247-
#
248-
## match -OH,-NH2,-SH,-F,-Cl,-Br,-I
249-
#pattern = Chem.MolFromSmarts('[a][O,N,S,F,Cl,Br,I]')
250-
#matches = mol.GetSubstructMatches(pattern, uniquify=False)
251-
#mol_ed = Chem.EditableMol(mol)
252-
#tmp_add = []
253-
#for match in matches:
254-
# if match[0] in ortho_atom:
255-
# paraH_idx = ortho2paraH[match[0]]
256-
# if paraH_idx not in tmp_add:
257-
# atomicNum = mol.GetAtomWithIdx(match[1]).GetAtomicNum()
258-
# add_atom = Chem.Atom(atomicNum)
259-
# mol_ed.ReplaceAtom(paraH_idx, add_atom)
260-
# tmp_add.append(paraH_idx)
261-
#mol = mol_ed.GetMol()
262-
#Chem.SanitizeMol(mol)
263-
264218
# construct a graph based on connectivity
265219
g = nx.Graph()
266220
nodes = []
@@ -290,11 +244,20 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
290244
atomNum = neig.GetAtomicNum()
291245
if (not sameRing) and (atomNum != 1):
292246
pair = [idx, neig_idx]
293-
pair_r = [neig_idx, idx]
294-
if not set(pair).issubset(set(torsion_idx_list)) and (pair in single_bonds or pair_r in single_bonds) and (pair not in special_nitrogen):
295-
spair = sorted([idx, neig_idx])
296-
if spair not in sub_bonds:
297-
sub_bonds.append(spair)
247+
pair.sort()
248+
if not set(pair).issubset(set(torsion_idx_list)) and (pair in single_bonds) and (pair not in special_nitrogen):
249+
if pair not in sub_bonds:
250+
sub_bonds.append(pair)
251+
# deal with the case when atom not in ring
252+
else:
253+
for neig in a.GetNeighbors():
254+
neig_idx = neig.GetIdx()
255+
pair = [idx, neig_idx]
256+
pair.sort()
257+
if not set(pair).issubset(set(torsion_idx_list)) and (pair in single_bonds) and (pair not in special_nitrogen):
258+
if pair not in sub_bonds:
259+
sub_bonds.append(pair)
260+
298261
for sub_bond in sub_bonds:
299262
a1, a2 = sub_bond
300263
g.remove_edge(a1, a2)
@@ -305,46 +268,6 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
305268
for m in frags:
306269
if not set(torsion_idx_list).issubset(list(m)):
307270
atomsToRemove += list(m)
308-
# Replace CH3 with H
309-
pattern = Chem.MolFromSmarts('[*][CH3]([H])([H])[H]')
310-
matches = mol.GetSubstructMatches(pattern, uniquify=False)
311-
for match in matches:
312-
x, c, h1, h2, h3 = match
313-
if c not in torsion_idx_list:
314-
atomsToRemove += [c, h1, h2, h3]
315-
xatom = mol.GetAtomWithIdx(x)
316-
atomic = xatom.GetAtomicNum()
317-
if atomic == 7:
318-
xatom.SetNumExplicitHs(xatom.GetTotalNumHs() + 1)
319-
320-
321-
# Replace CH2CH3 with H
322-
pattern = Chem.MolFromSmarts('[*][CH2]([H])([H])[CH3]([H])([H])[H]')
323-
matches = mol.GetSubstructMatches(pattern, uniquify=False)
324-
for match in matches:
325-
x, c1, h11, h12, c2, h21, h22, h23 = match
326-
if c1 not in torsion_idx_list:
327-
atomsToRemove += [c1, h11, h12, c2, h21, h22, h23]
328-
xatom = mol.GetAtomWithIdx(x)
329-
atomic = xatom.GetAtomicNum()
330-
if atomic == 7:
331-
xatom.SetNumExplicitHs(xatom.GetTotalNumHs() + 1)
332-
333-
# Replace NH3+ with H
334-
pattern = Chem.MolFromSmarts('[*][N+]([H])([H])[H]')
335-
matches = mol.GetSubstructMatches(pattern, uniquify=False)
336-
for match in matches:
337-
x, n, h1, h2, h3 = match
338-
if n not in torsion_idx_list:
339-
atomsToRemove += [n, h1, h2, h3]
340-
341-
# Replace NH2 with H
342-
pattern = Chem.MolFromSmarts('[*][N]([H])[H]')
343-
matches = mol.GetSubstructMatches(pattern, uniquify=False)
344-
for match in matches:
345-
x, n, h1, h2 = match
346-
if n not in torsion_idx_list:
347-
atomsToRemove += [n, h1, h2]
348271

349272
# Remove atoms on distant fused rings
350273
ring_info = mol.GetRingInfo()
@@ -404,25 +327,32 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
404327
atomsToRemove = []
405328
break
406329

407-
# if aromatic nitrogen misses connections
408-
# after cut, add one hydrogen atom
330+
# if NSP atoms miss connections
331+
# after cut, add hydrogen atom per bond
409332
# this should be done in mol (before cut)
410333
# since the atom order will be different after cut
411334
# Chengwen Liu
412335
# Oct 2024
413-
414-
for idx, connected_atoms in aromatic_nitrogen.items():
336+
nsp_bond = {}
337+
for idx, connected_atoms in nsp_atoms.items():
415338
for atm in connected_atoms:
416339
if (atm in atomsToRemove) and (idx not in atomsToRemove):
417-
nitrogen = mol.GetAtomWithIdx(idx)
418-
print(f'Setting number of attached hydrogen to be 1 on atom {idx}')
419-
nitrogen.SetNumExplicitHs(1)
340+
if idx not in nsp_bond.keys():
341+
nsp_bond[idx] = 1
342+
else:
343+
nsp_bond[idx] += 1
420344

345+
# Set correct number of hydrogen on NSP atoms
346+
for idx, nbond in nsp_bond.items():
347+
natom = mol.GetAtomWithIdx(idx)
348+
print(f'Setting number of attached hydrogen to be {nbond} on atom {idx}')
349+
natom.SetNumExplicitHs(nbond)
350+
421351
emol = Chem.EditableMol(mol)
422352
atomsToRemove.sort(reverse=True)
423353
for atom in atomsToRemove:
424354
emol.RemoveAtom(atom)
425-
355+
426356
mol2 = emol.GetMol()
427357
Chem.SanitizeMol(mol2)
428358
mol2h = Chem.AddHs(mol2,addCoords=True)

0 commit comments

Comments
 (0)