@@ -165,38 +165,40 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
165
165
new_order .insert (2 , second_idx )
166
166
tmp_mol = Chem .RenumberAtoms (tmp_mol , new_order )
167
167
rdmolfiles .MolToMolFile (tmp_mol , f'{ fname } _post.mol' )
168
- sdffile = f"{ fname } _post.mol"
168
+ sdffile = f"{ fname } _post.mol"
169
169
mol = Chem .MolFromMolFile (sdffile ,removeHs = False )
170
170
171
171
# we have the "correct" fragment to work on
172
172
# below we focus on trimming the fragment
173
-
173
+
174
174
# all single bonds are eligible to cut,
175
175
single_bonds = []
176
176
pattern = Chem .MolFromSmarts ('[*;!#1][*;!#1]' )
177
177
matches = mol .GetSubstructMatches (pattern , uniquify = False )
178
178
for match in matches :
179
- single_bonds .append (list (match ))
179
+ match = list (match )
180
+ match .sort ()
181
+ if match not in single_bonds :
182
+ single_bonds .append (match )
180
183
181
- # except non-trivalence nitrogen
184
+ # record non-trivalence nitrogen
182
185
special_nitrogen = []
183
186
pattern = Chem .MolFromSmarts ('[#7X2][*]' )
184
187
matches = mol .GetSubstructMatches (pattern , uniquify = False )
185
188
for match in matches :
186
189
special_nitrogen .append ([match [0 ], match [1 ]])
187
190
special_nitrogen .append ([match [1 ], match [0 ]])
188
191
189
-
190
- # record the aromatic nitrogen (n-*),
191
- aromatic_nitrogen = {}
192
- pattern = Chem .MolFromSmarts ('[n][*]' )
192
+ # record NSP and its connections
193
+ nsp_atoms = {}
194
+ pattern = Chem .MolFromSmarts ('[#7,#15,#16][*]' )
193
195
matches = mol .GetSubstructMatches (pattern , uniquify = False )
194
196
for match in matches :
195
- n , x = match
196
- if n not in aromatic_nitrogen .keys ():
197
- aromatic_nitrogen [ n ] = [x ]
197
+ nsp , x = match
198
+ if nsp not in nsp_atoms .keys ():
199
+ nsp_atoms [ nsp ] = [x ]
198
200
else :
199
- aromatic_nitrogen [ n ] += [x ]
201
+ nsp_atoms [ nsp ] += [x ]
200
202
201
203
# get the atom indices of torsion
202
204
torsion_idx_list = []
@@ -213,54 +215,6 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
213
215
if neig_idx not in torsion_idx_list :
214
216
torsion_idx_list .append (neig_idx )
215
217
216
- # OLD decision
217
- # Move the polar group from ortho to para position
218
- # if there are two polar groups on two ortho- site,
219
- # only the first-appearing one (smaller index) will
220
- # be moved, and the second one will be deleted
221
- # Chengwen Liu
222
- # Oct 2024
223
-
224
- #atomsInSixMemRing = []
225
- #pattern = Chem.MolFromSmarts('[a;r6]')
226
- #matches = mol.GetSubstructMatches(pattern)
227
- #for match in matches:
228
- # if len(match) == 1:
229
- # atomsInSixMemRing.append(match[0])
230
- #
231
- #ortho_atom = []
232
- #paraH = []
233
- #pattern = Chem.MolFromSmarts('[*;!#1][R][R][R][R][H]')
234
- #matches = mol.GetSubstructMatches(pattern, uniquify=False)
235
- #for match in matches:
236
- # mat = [match[0], match[1]]
237
- # if mat == [1,2]: # poltype always uses 1,2
238
- # if (match[2] not in ortho_atom) and (match[2] in atomsInSixMemRing):
239
- # ortho_atom.append(match[2])
240
- # paraH.append(match[5])
241
- # if mat == [2,1]: # poltype always uses 1,2
242
- # if (match[2] not in ortho_atom) and (match[2] in atomsInSixMemRing):
243
- # ortho_atom.append(match[2])
244
- # paraH.append(match[5])
245
- #
246
- #ortho2paraH = dict(zip(ortho_atom, paraH))
247
- #
248
- ## match -OH,-NH2,-SH,-F,-Cl,-Br,-I
249
- #pattern = Chem.MolFromSmarts('[a][O,N,S,F,Cl,Br,I]')
250
- #matches = mol.GetSubstructMatches(pattern, uniquify=False)
251
- #mol_ed = Chem.EditableMol(mol)
252
- #tmp_add = []
253
- #for match in matches:
254
- # if match[0] in ortho_atom:
255
- # paraH_idx = ortho2paraH[match[0]]
256
- # if paraH_idx not in tmp_add:
257
- # atomicNum = mol.GetAtomWithIdx(match[1]).GetAtomicNum()
258
- # add_atom = Chem.Atom(atomicNum)
259
- # mol_ed.ReplaceAtom(paraH_idx, add_atom)
260
- # tmp_add.append(paraH_idx)
261
- #mol = mol_ed.GetMol()
262
- #Chem.SanitizeMol(mol)
263
-
264
218
# construct a graph based on connectivity
265
219
g = nx .Graph ()
266
220
nodes = []
@@ -290,11 +244,20 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
290
244
atomNum = neig .GetAtomicNum ()
291
245
if (not sameRing ) and (atomNum != 1 ):
292
246
pair = [idx , neig_idx ]
293
- pair_r = [neig_idx , idx ]
294
- if not set (pair ).issubset (set (torsion_idx_list )) and (pair in single_bonds or pair_r in single_bonds ) and (pair not in special_nitrogen ):
295
- spair = sorted ([idx , neig_idx ])
296
- if spair not in sub_bonds :
297
- sub_bonds .append (spair )
247
+ pair .sort ()
248
+ if not set (pair ).issubset (set (torsion_idx_list )) and (pair in single_bonds ) and (pair not in special_nitrogen ):
249
+ if pair not in sub_bonds :
250
+ sub_bonds .append (pair )
251
+ # deal with the case when atom not in ring
252
+ else :
253
+ for neig in a .GetNeighbors ():
254
+ neig_idx = neig .GetIdx ()
255
+ pair = [idx , neig_idx ]
256
+ pair .sort ()
257
+ if not set (pair ).issubset (set (torsion_idx_list )) and (pair in single_bonds ) and (pair not in special_nitrogen ):
258
+ if pair not in sub_bonds :
259
+ sub_bonds .append (pair )
260
+
298
261
for sub_bond in sub_bonds :
299
262
a1 , a2 = sub_bond
300
263
g .remove_edge (a1 , a2 )
@@ -305,46 +268,6 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
305
268
for m in frags :
306
269
if not set (torsion_idx_list ).issubset (list (m )):
307
270
atomsToRemove += list (m )
308
- # Replace CH3 with H
309
- pattern = Chem .MolFromSmarts ('[*][CH3]([H])([H])[H]' )
310
- matches = mol .GetSubstructMatches (pattern , uniquify = False )
311
- for match in matches :
312
- x , c , h1 , h2 , h3 = match
313
- if c not in torsion_idx_list :
314
- atomsToRemove += [c , h1 , h2 , h3 ]
315
- xatom = mol .GetAtomWithIdx (x )
316
- atomic = xatom .GetAtomicNum ()
317
- if atomic == 7 :
318
- xatom .SetNumExplicitHs (xatom .GetTotalNumHs () + 1 )
319
-
320
-
321
- # Replace CH2CH3 with H
322
- pattern = Chem .MolFromSmarts ('[*][CH2]([H])([H])[CH3]([H])([H])[H]' )
323
- matches = mol .GetSubstructMatches (pattern , uniquify = False )
324
- for match in matches :
325
- x , c1 , h11 , h12 , c2 , h21 , h22 , h23 = match
326
- if c1 not in torsion_idx_list :
327
- atomsToRemove += [c1 , h11 , h12 , c2 , h21 , h22 , h23 ]
328
- xatom = mol .GetAtomWithIdx (x )
329
- atomic = xatom .GetAtomicNum ()
330
- if atomic == 7 :
331
- xatom .SetNumExplicitHs (xatom .GetTotalNumHs () + 1 )
332
-
333
- # Replace NH3+ with H
334
- pattern = Chem .MolFromSmarts ('[*][N+]([H])([H])[H]' )
335
- matches = mol .GetSubstructMatches (pattern , uniquify = False )
336
- for match in matches :
337
- x , n , h1 , h2 , h3 = match
338
- if n not in torsion_idx_list :
339
- atomsToRemove += [n , h1 , h2 , h3 ]
340
-
341
- # Replace NH2 with H
342
- pattern = Chem .MolFromSmarts ('[*][N]([H])[H]' )
343
- matches = mol .GetSubstructMatches (pattern , uniquify = False )
344
- for match in matches :
345
- x , n , h1 , h2 = match
346
- if n not in torsion_idx_list :
347
- atomsToRemove += [n , h1 , h2 ]
348
271
349
272
# Remove atoms on distant fused rings
350
273
ring_info = mol .GetRingInfo ()
@@ -404,25 +327,32 @@ def saveFragment(mol, atomsToKeep,output, xyz2index):
404
327
atomsToRemove = []
405
328
break
406
329
407
- # if aromatic nitrogen misses connections
408
- # after cut, add one hydrogen atom
330
+ # if NSP atoms miss connections
331
+ # after cut, add hydrogen atom per bond
409
332
# this should be done in mol (before cut)
410
333
# since the atom order will be different after cut
411
334
# Chengwen Liu
412
335
# Oct 2024
413
-
414
- for idx , connected_atoms in aromatic_nitrogen .items ():
336
+ nsp_bond = {}
337
+ for idx , connected_atoms in nsp_atoms .items ():
415
338
for atm in connected_atoms :
416
339
if (atm in atomsToRemove ) and (idx not in atomsToRemove ):
417
- nitrogen = mol .GetAtomWithIdx (idx )
418
- print (f'Setting number of attached hydrogen to be 1 on atom { idx } ' )
419
- nitrogen .SetNumExplicitHs (1 )
340
+ if idx not in nsp_bond .keys ():
341
+ nsp_bond [idx ] = 1
342
+ else :
343
+ nsp_bond [idx ] += 1
420
344
345
+ # Set correct number of hydrogen on NSP atoms
346
+ for idx , nbond in nsp_bond .items ():
347
+ natom = mol .GetAtomWithIdx (idx )
348
+ print (f'Setting number of attached hydrogen to be { nbond } on atom { idx } ' )
349
+ natom .SetNumExplicitHs (nbond )
350
+
421
351
emol = Chem .EditableMol (mol )
422
352
atomsToRemove .sort (reverse = True )
423
353
for atom in atomsToRemove :
424
354
emol .RemoveAtom (atom )
425
-
355
+
426
356
mol2 = emol .GetMol ()
427
357
Chem .SanitizeMol (mol2 )
428
358
mol2h = Chem .AddHs (mol2 ,addCoords = True )
0 commit comments