Skip to content

Commit 1269bf0

Browse files
authored
Merge pull request #168 from e-osipov/master
Add Protparam
2 parents 6108ee4 + 6e4ec81 commit 1269bf0

File tree

2 files changed

+93
-0
lines changed

2 files changed

+93
-0
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
pyopengl
1919
pytest
2020
requests
21+
biopython
2122
2223
- name: Test
2324
run: python3 -m pytest -v

scripts/protparam.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from pymol import cmd
2+
from io import StringIO
3+
4+
try:
5+
from Bio.SeqUtils.ProtParam import ProteinAnalysis
6+
from Bio import SeqIO
7+
from Bio.Seq import Seq
8+
except ModuleNotFoundError:
9+
# Note that Bio package might be missing from Pymol 2 installation!
10+
print("Oops! Protparam: Biopython is missing!\n If you want to install it, run protparam_dependencies_install command")
11+
12+
13+
@cmd.extend
14+
def protparam(selection='enabled', bychain=0):
15+
'''
16+
DESCRIPTION:
17+
Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
18+
By default, combines all chains of each object into the single sequence.
19+
20+
USAGE:
21+
protparam selection, [bychain]
22+
23+
DEPENDENCIES:
24+
biopython
25+
'''
26+
#TODO: add pretty output suitable for copy-pasting
27+
for entry in cmd.get_object_list(selection):
28+
sequence_obj = cmd.get_fastastr(f"({selection}) and {entry}")
29+
fasta_io = StringIO(sequence_obj)
30+
sequences = list(SeqIO.parse(fasta_io, "fasta"))
31+
sequences = [s.seq for s in sequences]
32+
if not bychain:
33+
#by default combine all chains into single sequence
34+
sequences = [Seq('').join(sequences)]
35+
for sequence in sequences:
36+
sequence = str(sequence).replace('?','').strip()
37+
analysis = ProteinAnalysis(sequence)
38+
counts_aa = analysis.count_amino_acids() #Dict is useful when only specific residues should be reported
39+
print(f"Protein name: {entry}")
40+
print(f"Sequence: {sequence}")
41+
print(f"\nProtein length: {analysis.length} aa")
42+
print(f"Molecular Weight: {analysis.molecular_weight():.1f} Da")
43+
print(f"Isoelectric point: {analysis.isoelectric_point():.2f}")
44+
print(f"Count of aminoacids: {counts_aa}\n\n")
45+
46+
@cmd.extend
47+
def protparam_dependencies_install():
48+
import sys
49+
import subprocess
50+
try:
51+
subprocess.check_call([sys.executable, "-m", "pip", "install", 'biopython'])
52+
print(f"Successfully installed biopython! Reload Protparam plugin or restart PyMOL.")
53+
except subprocess.CalledProcessError as e:
54+
print(f"Failed to install biopython: {e}")
55+
56+
def test_protparam(capsys):
57+
cmd.reinitialize()
58+
cmd.fab("A// ACD B// EFG", "m1")
59+
cmd.fab("HIKL", "m2")
60+
cmd.alter("resn CYS", "resn='UNK'")
61+
protparam()
62+
captured = capsys.readouterr()
63+
assert "Protein name: m1" in captured.out
64+
assert "Protein name: m2" in captured.out
65+
assert "Sequence: ADEFG\n" in captured.out
66+
assert "Sequence: HIKL\n" in captured.out
67+
assert "Protein length: 2 aa" not in captured.out
68+
assert "Protein length: 3 aa" not in captured.out
69+
assert "Protein length: 4 aa" in captured.out
70+
assert "Protein length: 5 aa" in captured.out
71+
assert "Count of aminoacids: {'A': 1," in captured.out
72+
protparam(bychain=1)
73+
captured = capsys.readouterr()
74+
assert "Protein name: m1" in captured.out
75+
assert "Protein name: m2" in captured.out
76+
assert "Sequence: AD\n" in captured.out
77+
assert "Sequence: EFG\n" in captured.out
78+
assert "Protein length: 2 aa" in captured.out
79+
assert "Protein length: 3 aa" in captured.out
80+
assert "Protein length: 4 aa" in captured.out
81+
assert "Protein length: 5 aa" not in captured.out
82+
assert "Molecular Weight: 204.2 Da" in captured.out
83+
protparam("resn LYS")
84+
captured = capsys.readouterr()
85+
assert "Protein name: m1" not in captured.out
86+
assert "Protein name: m2" in captured.out
87+
assert "Protein length: 1 aa" in captured.out
88+
assert "Isoelectric point: 8.75" in captured.out
89+
protparam("resn TRP")
90+
captured = capsys.readouterr()
91+
assert captured.out == ""
92+
assert captured.err == ""

0 commit comments

Comments
 (0)