1
+ from pymol import cmd
2
+ from io import StringIO
3
+
4
+ try :
5
+ from Bio .SeqUtils .ProtParam import ProteinAnalysis
6
+ from Bio import SeqIO
7
+ from Bio .Seq import Seq
8
+ except ModuleNotFoundError :
9
+ # Note that Bio package might be missing from Pymol 2 installation!
10
+ print ("Oops! Protparam: Biopython is missing!\n If you want to install it, run protparam_dependencies_install command" )
11
+
12
+
13
+ @cmd .extend
14
+ def protparam (selection = 'enabled' , bychain = 0 ):
15
+ '''
16
+ DESCRIPTION:
17
+ Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
18
+ By default, combines all chains of each object into the single sequence.
19
+
20
+ USAGE:
21
+ protparam selection, [bychain]
22
+
23
+ DEPENDENCIES:
24
+ biopython
25
+ '''
26
+ #TODO: add pretty output suitable for copy-pasting
27
+ for entry in cmd .get_object_list (selection ):
28
+ sequence_obj = cmd .get_fastastr (f"({ selection } ) and { entry } " )
29
+ fasta_io = StringIO (sequence_obj )
30
+ sequences = list (SeqIO .parse (fasta_io , "fasta" ))
31
+ sequences = [s .seq for s in sequences ]
32
+ if not bychain :
33
+ #by default combine all chains into single sequence
34
+ sequences = [Seq ('' ).join (sequences )]
35
+ for sequence in sequences :
36
+ sequence = str (sequence ).replace ('?' ,'' ).strip ()
37
+ analysis = ProteinAnalysis (sequence )
38
+ counts_aa = analysis .count_amino_acids () #Dict is useful when only specific residues should be reported
39
+ print (f"Protein name: { entry } " )
40
+ print (f"Sequence: { sequence } " )
41
+ print (f"\n Protein length: { analysis .length } aa" )
42
+ print (f"Molecular Weight: { analysis .molecular_weight ():.1f} Da" )
43
+ print (f"Isoelectric point: { analysis .isoelectric_point ():.2f} " )
44
+ print (f"Count of aminoacids: { counts_aa } \n \n " )
45
+
46
+ @cmd .extend
47
+ def protparam_dependencies_install ():
48
+ import sys
49
+ import subprocess
50
+ try :
51
+ subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , 'biopython' ])
52
+ print (f"Successfully installed biopython! Reload Protparam plugin or restart PyMOL." )
53
+ except subprocess .CalledProcessError as e :
54
+ print (f"Failed to install biopython: { e } " )
55
+
56
+ def test_protparam (capsys ):
57
+ cmd .reinitialize ()
58
+ cmd .fab ("A// ACD B// EFG" , "m1" )
59
+ cmd .fab ("HIKL" , "m2" )
60
+ cmd .alter ("resn CYS" , "resn='UNK'" )
61
+ protparam ()
62
+ captured = capsys .readouterr ()
63
+ assert "Protein name: m1" in captured .out
64
+ assert "Protein name: m2" in captured .out
65
+ assert "Sequence: ADEFG\n " in captured .out
66
+ assert "Sequence: HIKL\n " in captured .out
67
+ assert "Protein length: 2 aa" not in captured .out
68
+ assert "Protein length: 3 aa" not in captured .out
69
+ assert "Protein length: 4 aa" in captured .out
70
+ assert "Protein length: 5 aa" in captured .out
71
+ assert "Count of aminoacids: {'A': 1," in captured .out
72
+ protparam (bychain = 1 )
73
+ captured = capsys .readouterr ()
74
+ assert "Protein name: m1" in captured .out
75
+ assert "Protein name: m2" in captured .out
76
+ assert "Sequence: AD\n " in captured .out
77
+ assert "Sequence: EFG\n " in captured .out
78
+ assert "Protein length: 2 aa" in captured .out
79
+ assert "Protein length: 3 aa" in captured .out
80
+ assert "Protein length: 4 aa" in captured .out
81
+ assert "Protein length: 5 aa" not in captured .out
82
+ assert "Molecular Weight: 204.2 Da" in captured .out
83
+ protparam ("resn LYS" )
84
+ captured = capsys .readouterr ()
85
+ assert "Protein name: m1" not in captured .out
86
+ assert "Protein name: m2" in captured .out
87
+ assert "Protein length: 1 aa" in captured .out
88
+ assert "Isoelectric point: 8.75" in captured .out
89
+ protparam ("resn TRP" )
90
+ captured = capsys .readouterr ()
91
+ assert captured .out == ""
92
+ assert captured .err == ""
0 commit comments