-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdb_addH.py
executable file
·439 lines (344 loc) · 14.6 KB
/
pdb_addH.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
#!/usr/bin/env python
# Copyright 2007, Michael J. Harms
# This program is distributed under General Public License v. 3. See the file
# COPYING for a copy of the license.
__description__ = "Adds polar hydrogens to a pdb file for a UHBD calculation."
__author__ = "Michael J. Harms"
__date__ = "070727"
from helper import container, cmdline, geometry
from math import sqrt
from pdb_atom_renumber import pdbAtomRenumber
from pdb_disulfide import pdbDisulfide
import time, os
import charmm.interface
class PdbAddHError(Exception):
"""
General error class for this module.
"""
pass
def renameResidue(pdb,residue_name,residue_number):
"""
Renames residue defined by residue number to residue_name.
"""
residue = [l for l in pdb if l[21:26] == residue_number]
index = pdb.index(residue[0])
for i, r in enumerate(residue):
pdb[index + i] = "%s%-4s%s" % (r[:17],residue_name,r[21:])
return pdb
def convertHis(pdb,his_types=None):
"""
Rename HIS residues to HSD or HIS (1 or 2 tautomers). If his_types is
specified, use numbers in there. Otherwise, set all to 2 tautomer.
"""
# Find histidines
all_his = ["HIS ","HSD ","HSE ","HISA","HISB"]
his_dict = {1:"HSD ",2:"HIS ",3:"HSC "}
# Change his types
HIS_lines = [l for l in pdb if l[17:21] in all_his and l[13:16] == "N "]
if his_types == None:
his_types = [2 for l in HIS_lines]
# Create final list of hist types
his_types = [his_dict[k] for k in his_types]
# Change his in file to proper his types
for index, HIS_line in enumerate(HIS_lines):
residue_number = HIS_line[21:26]
pdb = renameResidue(pdb,his_types[index],residue_number)
return pdb
def convertResidues(pdb,atom_conv={},resid_conv={},atom_skip=[],resid_skip=[]):
"""
Convert a pdb file to a CHARMM readable input (i.e. set HIS tautomeric
states and give every group a charge.
"""
atom_to_convert = atom_conv.keys()
res_to_convert = resid_conv.keys()
new_pdb = []
for line in pdb:
atom = line[12:16]
if atom in atom_skip:
continue
elif atom in atom_to_convert:
line = "%s%-4s%s" % (line[:12],atom_conv[atom],line[16:])
res = line[17:21]
if atom in atom_skip:
continue
elif res in res_to_convert:
line = "%s%-4s%s" % (line[:17],resid_conv[res],line[21:])
new_pdb.append(line)
return new_pdb
def processCysteines(pdb):
"""
Find disulfide bonds. Add hydrogens to free cysteines. Change name of
disulfide bonded residues to CSS.
"""
# Find disulfide bonds
free_cys, disulfide = pdbDisulfide(pdb)
# Add hydrogens to free cysteines
for cys in free_cys:
residue = [l for l in pdb if l[21:26] == cys]
CB_line = [l for l in residue if l[13:16] == "CB "][0]
SG_line = [l for l in residue if l[13:16] == "SG "][0]
CB_coord = [float(CB_line[30+8*i:38+8*i]) for i in range(3)]
SG_coord = [float(SG_line[30+8*i:38+8*i]) for i in range(3)]
HG_coord = geometry.calcHG(CB_coord,SG_coord)
HG_line = "ATOM %5i %-3s %-4s%5s %8.3F%8.3F%8.3F%23s%s \n" % \
(1,"HG","CYS",cys,HG_coord[0],HG_coord[1],HG_coord[2]," ","H")
index = pdb.index(SG_line)
pdb.insert(index+1,HG_line)
# Rename CYS in disulfide bonds to CSS
for cys in disulfide:
pdb = renameResidue(pdb,"CSS",cys)
return pdb
def processTerm(pdb):
"""
Process termini. Change name of N-terminal residues to RESN, name of
C-terminal residues to RESC, and add C-terminal carboxyl hydrogen.
"""
# Change name of N-terminal residue
HT1_lines = [l for l in pdb if l[13:16] == "HT1"]
for HT1_line in HT1_lines:
# Change name of N terminal residue to RESN (i.e. ALAN, ARGN, etc.)
residue_name = HT1_line[16:20].strip()
residue_name = "%4s" % (residue_name + "N")
residue_number = HT1_line[21:26]
pdb = renameResidue(pdb,residue_name,residue_number)
# Add C-terminal hydrogens
OXT_lines = [l for l in pdb if l[13:16] == "OXT"]
for OXT_line in OXT_lines:
# Grab residues in C-terminus
last_residue = [l for l in pdb if l[21:26] == OXT_line[21:26]]
C_line = [l for l in last_residue if l[13:16] == "C "][0]
O_line = [l for l in last_residue if l[13:16] == "O "][0]
# Calculate position of HXT
OXT_coord = [float(OXT_line[30+8*i:38+8*i]) for i in range(3)]
C_coord = [float(C_line[30+8*i:38+8*i]) for i in range(3)]
O_coord = [float(O_line[30+8*i:38+8*i]) for i in range(3)]
HXT_coord = geometry.calcHXT(C_coord,O_coord,OXT_coord)
# Convert position of HXT to a pdb line
HXT_line = "ATOM %5i %-3s %9s %8.3F%8.3F%8.3F%23s%s \n" % \
(1,"HXT",OXT_line[17:26],
HXT_coord[0],HXT_coord[1],HXT_coord[2]," ","H")
# Insert HXT
index = pdb.index(OXT_line)
pdb.insert(index+1,HXT_line)
# Change name of C terminal residue to RESC (i.e. ALAC, ARGC, etc.)
residue_name = OXT_line[16:20].strip()
residue_name = "%4s" % (residue_name + "C")
residue_number = OXT_line[21:26]
pdb = renameResidue(pdb,residue_name,residue_number)
return pdb
def flipAtoms(pdb):
"""
Flip the OX1/OX2 labels of GLU and ASP residues.
"""
flip = {"ASP":("OD1","OD2"),
"GLU":("OE1","OE2")}
flip_keys = flip.keys()
for index, line in enumerate(pdb):
res = line[17:30]
if res in flip_keys and line[13:16] in flip[res]:
new_atom = flip[res].index(line[13:16]) - 1
pdb[index] = "%s%s%s" % (line[0:13],new_atom,line[16:])
return pdb
def pdbAddH(pdb,pdb_id,uhbd_style=False,his_types=None,calc_type="single",
keep_temp=False,hbond=False):
"""
Add polar hydrogens to the structure using CHARMM for a UHBD calculation.
"""
# Residues to alter and skip during processing
if calc_type == "single":
pdb2charmm_resid = {"LYS ":"LYSN","ARG ":"ARGN","GLU ":"GLUH",
"ASP ":"ASPH","LYSH":"LYSN","LSN ":"LYSN"}
charmm2pdb_resid = {"LYSN":"LYS ","ARGN":"ARG ","GLUH":"GLU ",
"ASPH":"ASP ","HIS ":"HISA","HSD ":"HISB"}
elif calc_type == "full":
pdb2charmm_resid = {"GLU ":"GLUH","ASP ":"ASPH","LYSH":"LYS ",
"LSN ":"LYS "}
charmm2pdb_resid = {"GLUH":"GLU ","ASPH":"ASP ","HIS ":"HISA",
"HSD ":"HISB","HSC ":"HISA"}
else:
err = "Calculation type \"%s\" not recognized!" % calc_type
raise PdbAddHError(err)
charmm2pdb_atom_skip = [" HT3"]
all_his = ["HIS ","HSD ","HSE ","HISA","HISB"]
# Grab sequence and atoms from pdb file
seq_lines = [l for l in pdb if l[0:6] == "SEQRES"]
atom_lines = [l for l in pdb if l[0:6] == "ATOM "]
# Create a pdb object that will find termini and renumber all atoms. The
# renumbering scheme is dumped to pdb_id_resid-conversion.txt
pdb_obj = container.Structure(pdb_id,seq_lines,atom_lines)
pdb_obj.renumberAtoms()
pdb_obj.dumpNumberConversion("%s_resid-conversion.txt" % pdb_id)
structure_list = pdb_obj.dumpStructures()
# Convert residue names in structure
for index, struct in enumerate(structure_list):
tmp_struct = convertResidues(struct[0],resid_conv=pdb2charmm_resid)
structure_list[index][0] = tmp_struct
# Convert histidines to correct type (speificied in tautomer file). If
# not tautomer file is specified, default HIS is passed to charmm
if calc_type == "single":
his_list = his_types
for index, struct in enumerate(structure_list):
if his_types != None:
num_his = len([l for l in struct[0] if l[17:21] in all_his])
try:
his = his_list[:num_his]
his_list = his_list[num_his:]
except IndexError:
err = "Number of HIS in pdb and tautomer file do not match!"
raise cmdline.parser.error(err)
else:
his = None
tmp_struct = convertHis(struct[0][:],his)
structure_list[index][0] = tmp_struct
# Make sure that all his where used
if his_types != None and len(his_list) != 0:
raise cmdline.parser.error(err)
# For full calculation, convert all histidines to charged form (HSC)
elif calc_type == "full":
for index, struct in enumerate(structure_list):
num_his = len([l for l in struct[0] if l[17:21] in all_his])
his = [3 for i in range(num_his)]
tmp_struct = convertHis(struct[0][:],his)
structure_list[index][0] = tmp_struct
# Flip carboxyl atoms
if calc_type == "full":
for index, struct in enumerate(structure_list):
structure_list[index][0] = flipAtoms(struct[0])
# User CHARMM to add hydrogens
try:
out_pdb = charmm.interface.charmmWash(structure_list,calc_type,
keep_temp,hbond)
except charmm.interface.CharmmInterfaceError, (strerr):
err = "Error in charmm!\n"
err += "%s\n" % strerr
raise PdbAddHError(err)
# Deal with addH specific changes in cysteines, termini, and residue names
out_pdb = processCysteines(out_pdb)
out_pdb = convertResidues(out_pdb,resid_conv=charmm2pdb_resid,
atom_skip=charmm2pdb_atom_skip)
out_pdb = processTerm(out_pdb)
new_pdb = container.Structure("tmp",[],out_pdb)
new_pdb.loadNumberConversion("%s_resid-conversion.txt" % pdb_id,"fixed")
new_pdb.renumberAtoms()
out = []
for chain in new_pdb.chains:
out.extend(chain.atom_lines)
ter = out[-1]
ter = "%s%s%54s\n" % ("TER ",ter[6:26]," ")
out.append(ter)
out.append("%-80s\n" % "END")
if uhbd_style:
out = [l for l in out if l[0:3] != "TER"]
# UHBD takes a non-standard pdb file; atom names must be left-justified.
out = ["%s%-4s%s" % (l[:12],l[12:16].strip(),l[16:]) for l in out]
# UHDB also cannot handle chain identifiers, remove them
out = ["%s %s" % (l[0:21],l[22:]) for l in out]
# Add header and END
out.insert(0,"%-79s\n" % "REMARK Polar hydrogens added by pdb_addH.py")
out.insert(1,"%-79s\n" % ("REMARK Time: %s" % time.asctime()))
return out
def main():
"""
Call if program called from command line.
"""
# Parse command line
cmdline.initializeParser(__description__,__date__)
cmdline.addOption(short_flag="t",
long_flag="his_tautomers",
action="store",
default=None,
help="File containing his tautomers to use",
nargs=1)
cmdline.addOption(short_flag="k",
long_flag="keep_temp",
action="store_true",
default=False,
help="Keep temporary files")
cmdline.addOption(short_flag="f",
long_flag="full",
action="store_true",
default=False,
help="Add hydrogens for UHBD full calculation")
cmdline.addOption(short_flag="b",
long_flag="hbond",
action="store",
default=None,
help="Write out hbonds to file",
nargs=1,
type=str)
cmdline.addOption(short_flag="u",
long_flag="uhbd_style",
action="store_true",
default=False,
help="Write out in non-standard uhbd format")
cmdline.addOption(short_flag="s",
long_flag="skip",
action="store_true",
default=True,
help="skip messed up pdb files")
file_list, options = cmdline.parseCommandLine()
# Deal with his_tautomers file
if options.his_tautomers != None:
# Read in as a standard ascii file.
his_types = cmdline.readFile(options.his_tautomers)
try:
his_types = [int(h) for h in his_types]
# Make sure that entries are valid
check = [h == 1 or h == 2 for h in his_types]
if False in check:
raise ValueError
except ValueError:
err = "His tautomer file can contain only 1's (HISB) and 2's (HISA)"
cmdline.parser.error(err)
else:
his_types = None
# Decide whether to keep temp files and how to format output
keep_temp = options.keep_temp
uhbd_style = options.uhbd_style
hbond = options.hbond
# Decide whether to add "full" hydrogens.
if options.full:
calc_type = "full"
else:
calc_type = "single"
# Add hydrogens for every file in file_list
file_list.sort()
for file in file_list:
pdb_id = os.path.split(file)[-1][:-4]
out_file = "%sH.pdb" % pdb_id
print "%s --> %s" % (file,out_file)
# Load in file
f = open(file,'r')
pdb = f.readlines()
f.close()
# Add hydrogens
try:
pdb_out = pdbAddH(pdb,pdb_id,uhbd_style=uhbd_style,
his_types=his_types,
keep_temp=keep_temp,
calc_type=calc_type,
hbond=hbond)
except PdbAddHError, (strerror):
err = "Addition of hydrogens failed for %s\n" % file
err += "Problem was with CHARMM\n.%s" % strerror
print err
if options.skip:
print "CHARMM output written to error.log"
g = open("error.log","a")
g.write(err)
g.write("charmm.out copied below\n%s\n" % (79*"-"))
h = open("charmm.out",'r')
charmm_out = h.readlines()
h.close()
g.writelines(charmm_out)
g.write("\n\n")
g.close()
continue
else:
sys.exit()
# Dump to output file
g = open(out_file,'w')
g.writelines(pdb_out)
g.close()
if __name__ == "__main__":
main()