1+ from pymol import cmd
2+ from io import StringIO
3+
4+ try :
5+ from Bio .SeqUtils .ProtParam import ProteinAnalysis
6+ from Bio import SeqIO
7+ from Bio .Seq import Seq
8+ except ModuleNotFoundError :
9+ # Note that Bio package might be missing from Pymol 2 installation!
10+ print ("Oops! Protparam: Biopython is missing!\n If you want to install it, run protparam_dependencies_install command" )
11+
12+
13+ @cmd .extend
14+ def protparam (selection = 'enabled' , bychain = 0 ):
15+ '''
16+ DESCRIPTION:
17+ Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
18+ By default, combines all chains of each object into the single sequence.
19+
20+ USAGE:
21+ protparam selection, [bychain]
22+
23+ DEPENDENCIES:
24+ biopython
25+ '''
26+ #TODO: add pretty output suitable for copy-pasting
27+ for entry in cmd .get_object_list (selection ):
28+ sequence_obj = cmd .get_fastastr (f"({ selection } ) and { entry } " )
29+ fasta_io = StringIO (sequence_obj )
30+ sequences = list (SeqIO .parse (fasta_io , "fasta" ))
31+ sequences = [s .seq for s in sequences ]
32+ if not bychain :
33+ #by default combine all chains into single sequence
34+ sequences = [Seq ('' ).join (sequences )]
35+ for sequence in sequences :
36+ sequence = str (sequence ).replace ('?' ,'' ).strip ()
37+ analysis = ProteinAnalysis (sequence )
38+ counts_aa = analysis .count_amino_acids () #Dict is useful when only specific residues should be reported
39+ print (f"Protein name: { entry } " )
40+ print (f"Sequence: { sequence } " )
41+ print (f"\n Protein length: { analysis .length } aa" )
42+ print (f"Molecular Weight: { analysis .molecular_weight ():.1f} Da" )
43+ print (f"Isoelectric point: { analysis .isoelectric_point ():.2f} " )
44+ print (f"Count of aminoacids: { counts_aa } \n \n " )
45+
46+ @cmd .extend
47+ def protparam_dependencies_install ():
48+ import sys
49+ import subprocess
50+ try :
51+ subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , 'biopython' ])
52+ print (f"Successfully installed biopython! Reload Protparam plugin or restart PyMOL." )
53+ except subprocess .CalledProcessError as e :
54+ print (f"Failed to install biopython: { e } " )
55+
56+ def test_protparam (capsys ):
57+ cmd .reinitialize ()
58+ cmd .fab ("A// ACD B// EFG" , "m1" )
59+ cmd .fab ("HIKL" , "m2" )
60+ cmd .alter ("resn CYS" , "resn='UNK'" )
61+ protparam ()
62+ captured = capsys .readouterr ()
63+ assert "Protein name: m1" in captured .out
64+ assert "Protein name: m2" in captured .out
65+ assert "Sequence: ADEFG\n " in captured .out
66+ assert "Sequence: HIKL\n " in captured .out
67+ assert "Protein length: 2 aa" not in captured .out
68+ assert "Protein length: 3 aa" not in captured .out
69+ assert "Protein length: 4 aa" in captured .out
70+ assert "Protein length: 5 aa" in captured .out
71+ assert "Count of aminoacids: {'A': 1," in captured .out
72+ protparam (bychain = 1 )
73+ captured = capsys .readouterr ()
74+ assert "Protein name: m1" in captured .out
75+ assert "Protein name: m2" in captured .out
76+ assert "Sequence: AD\n " in captured .out
77+ assert "Sequence: EFG\n " in captured .out
78+ assert "Protein length: 2 aa" in captured .out
79+ assert "Protein length: 3 aa" in captured .out
80+ assert "Protein length: 4 aa" in captured .out
81+ assert "Protein length: 5 aa" not in captured .out
82+ assert "Molecular Weight: 204.2 Da" in captured .out
83+ protparam ("resn LYS" )
84+ captured = capsys .readouterr ()
85+ assert "Protein name: m1" not in captured .out
86+ assert "Protein name: m2" in captured .out
87+ assert "Protein length: 1 aa" in captured .out
88+ assert "Isoelectric point: 8.75" in captured .out
89+ protparam ("resn TRP" )
90+ captured = capsys .readouterr ()
91+ assert captured .out == ""
92+ assert captured .err == ""
0 commit comments