Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit aff1bdd

Browse files
authoredAug 9, 2024··
Merge pull request #51 from gph82/select_by_GRN
New features: * sites get a new "pairs"-scheme: "consensus" * plots get a new "sort_by" option: "consensus" * minor fixes in error reporting and handling
2 parents 1f17f47 + 739074e commit aff1bdd

File tree

16 files changed

+287
-99
lines changed

16 files changed

+287
-99
lines changed
 

‎mdciao/cli/cli.py

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def _manage_timedep_ploting_and_saving_options(ctc_grp,
384384
fname = _path.join(fn.output_dir, iname)
385385
ifig.axes[0].set_title("%s" % title) # TODO consider firstname lastname
386386
ifig.savefig(fname, bbox_inches="tight", dpi=fn.graphic_dpi)
387-
_plt.close(ifig)
387+
#_plt.close(ifig)
388388
print(fname)
389389

390390
# even if no figures were produced or saved, we can still save the trajs
@@ -1692,18 +1692,45 @@ def sites(site_inputs,
16921692
16931693
Parameters
16941694
----------
1695-
site_inputs : dict or list of dicts
1695+
site_inputs : dict or path to file, or list thereof
16961696
Site(s) to compute. A site can be either
16971697
a path to a site file in json format or
16981698
directly a site dictionary. A site dictionary
16991699
is something like
17001700
1701-
>>> {"name":"site",
1702-
>>> "pairs":{"AAresSeq":["GLU30-ARG40",
1703-
>>> "LYS31-W70"]}}
1701+
>>> {"name": "interesting contacts",
1702+
>>> "pairs": {"AAresSeq": ["L394-K270",
1703+
>>> "D381-Q229"]}}
17041704
17051705
Any site containing a residue that can't be
17061706
found in the topology will be discarded.
1707+
The list in "pairs" can be specified as:
1708+
* 'AAresSeq':
1709+
>>> {"name": "interesting contacts",
1710+
>>> "pairs": {"AAresSeq": ["L394-K270",
1711+
>>> "D381-Q229"]}}
1712+
The 'AAresSeq' definitions are transferable to
1713+
another system where the same aminoacids are
1714+
present, regardless of their actual zero-indexing.
1715+
* 'residx':
1716+
>>> {"name": "interesting contacts",
1717+
>>> "pairs": {"residx":[[353,972],
1718+
>>> [340,956]]}}
1719+
The 'pairs' definitions are only transferable
1720+
across systems as long both systems share the same
1721+
zero-indexing scheme.
1722+
* 'consensus'
1723+
>>> {"name": "interesting contacts",
1724+
>>> "pairs": {"consensus": ["G.H5.26-6.32x32",
1725+
>>> "G.H5.13-5.68x68"]}}
1726+
The 'consensus' definitions are transferable to
1727+
another system, even if the selected aminoacids are
1728+
different. Please note, in order
1729+
to use 'consenus' definitions, you need
1730+
to pass at least one (or more) of the `GPCR_UniProt`,
1731+
`CGN_UniProt` or `KLIFS_string` arguments, else
1732+
there is no way to know to which residues the labels
1733+
belong to.
17071734
See :obj:`mdciao.sites` for more info on
17081735
the site format.
17091736
trajectories : str, :obj:`mdtraj.Trajectory` or lists thereof
@@ -1758,28 +1785,28 @@ def sites(site_inputs,
17581785
(allows for object re-use when in API mode)
17591786
See :obj:`mdciao.nomenclature` for more info and references.
17601787
KLIFS_string : str or :obj:`mdciao.nomenclature.LabelerKLIFS`, default is None
1761-
String for kinase KLIFS nomenclature. First, try to locate a local
1762-
file that directly has the `KLIFS_string` as a name. If that fails, then
1763-
combine the filename-format expected by :obj:`mdciao.nomenclature.LabelerKLIFS`
1764-
with `KLIFS_string` to construct a filename and check again.
1765-
If that doesn't work, then go online to contact the KLIFS database.
1766-
1767-
For the online lookup in the KLIFS database, the string
1768-
has to be formatted "key:value", which ultimately leads to a given KLIFS entry.
1769-
Acceptable keys and values for `KLIFS_string` are:
1770-
* "UniProtAC", e.g. "UniProtAC:P31751"
1771-
* "kinase_ID", e.g. "kinase_ID:2"
1772-
* "structure_ID", e.g. "structure_ID:1904", e.g. "P31751",
1773-
Please check the documentation on :obj:`mdciao.nomenclature.LabelerKLIFS`
1774-
for a more elaborate explanation on when to pick one of these key:value
1775-
pairs.
1776-
1777-
Finally, if `KLIFS_string` is an :obj:`mdciao.nomenclature.LabelerKLIFS`,
1778-
use this object directly (allows for object re-use when in API mode).
1779-
See :obj:`mdciao.nomenclature` for more info and references. Alos, please note
1780-
the difference between UniProt Accession Code
1781-
and UniProt entry name as explained
1782-
`here <https://www.uniprot.org/help/difference%5Faccession%5Fentryname>`_ .
1788+
String for kinase KLIFS nomenclature. First, try to locate a local
1789+
file that directly has the `KLIFS_string` as a name. If that fails, then
1790+
combine the filename-format expected by :obj:`mdciao.nomenclature.LabelerKLIFS`
1791+
with `KLIFS_string` to construct a filename and check again.
1792+
If that doesn't work, then go online to contact the KLIFS database.
1793+
1794+
For the online lookup in the KLIFS database, the string
1795+
has to be formatted "key:value", which ultimately leads to a given KLIFS entry.
1796+
Acceptable keys and values for `KLIFS_string` are:
1797+
* "UniProtAC", e.g. "UniProtAC:P31751"
1798+
* "kinase_ID", e.g. "kinase_ID:2"
1799+
* "structure_ID", e.g. "structure_ID:1904", e.g. "P31751",
1800+
Please check the documentation on :obj:`mdciao.nomenclature.LabelerKLIFS`
1801+
for a more elaborate explanation on when to pick one of these key:value
1802+
pairs.
1803+
1804+
Finally, if `KLIFS_string` is an :obj:`mdciao.nomenclature.LabelerKLIFS`,
1805+
use this object directly (allows for object re-use when in API mode).
1806+
See :obj:`mdciao.nomenclature` for more info and references. Alos, please note
1807+
the difference between UniProt Accession Code
1808+
and UniProt entry name as explained
1809+
`here <https://www.uniprot.org/help/difference%5Faccession%5Fentryname>`_ .
17831810
fragments : str, list, None, default is "lig_resSeq+"
17841811
Topology fragments. There exist several input modes:
17851812
@@ -1918,6 +1945,7 @@ def sites(site_inputs,
19181945
ctc_idxs_small, site_maps = _mdcsites.sites_to_res_pairs(sites, refgeom.top,
19191946
fragments=fragments_as_residue_idxs,
19201947
default_fragment_index=default_fragment_index,
1948+
consensus_maps=[consensus_maps if len(consensus_maps)>0 else None][0],
19211949
fragment_names=fragment_names)
19221950
if None in ctc_idxs_small:
19231951
print("Some definitions of the 'site_inputs' contain one or more residues not found in the input topology.")
@@ -2186,7 +2214,7 @@ def _res_resolver(res_range, top, fragments, midstring=None, GPCR_UniProt=None,
21862214

21872215
res_idxs_list = _mdcu.residue_and_atom.rangeexpand_residues2residxs(res_range, fragments, top,
21882216
pick_this_fragment_by_default=None,
2189-
additional_resnaming_dicts=consensus_maps,
2217+
additional_resnaming_dicts=[consensus_maps if len(consensus_maps)>0 else None][0],
21902218
**rangeexpand_residues2residxs_kwargs,
21912219
)
21922220

‎mdciao/contacts/contacts.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,9 +1953,13 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
19531953
Parameters
19541954
----------
19551955
AA_format : str, default is "short"
1956-
Amino-acid format for the label, can
1957-
be "short" (A35@4.50), "long" (ALA35@4.50),
1958-
or "just_consensus" (4.50)
1956+
Amino-acid format for the label, can be
1957+
* short: A35@4.55
1958+
* "long": ALA35@4.50
1959+
* "just_consensus": 4.50
1960+
* "try_consensus": 4.50 if consensus labeling is present,
1961+
else default to "short"
1962+
19591963
split_label : bool, default is True
19601964
Split the labels so that stacked contact labels
19611965
become easier-to-read in plain ascii formats
@@ -1966,10 +1970,10 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
19661970
contact label. Default is to leave
19671971
them as is, e.g. would be "@"
19681972
fmt1 : str, default is "%-15s"
1969-
Specify how the labels of res1 should formatted.
1973+
Specify how the labels of res1 should be formatted.
19701974
Only has effect if `split_label` is True
19711975
fmt2 : str, default is "%-15s"
1972-
Specify how the labels of res2 should formatted.
1976+
Specify how the labels of res2 should be formatted.
19731977
Only has effect if `split_label` is True
19741978
Returns
19751979
-------
@@ -1980,13 +1984,25 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
19801984
label = self.labels.w_fragments_short_AA
19811985
elif AA_format== 'long':
19821986
label = self.labels.w_fragments
1983-
elif AA_format== 'just_consensus':
1987+
elif AA_format.endswith('_consensus'):
19841988
#TODO where do we put this assertion?
19851989
if None in self._attribute_residues.consensus_labels:
1986-
raise ValueError("Residues %s don't have both consensus labels:%s" % (
1987-
self._attribute_residues.names_short,
1988-
self._attribute_residues.consensus_labels))
1989-
label = self.labels.just_consensus
1990+
if AA_format.startswith("just_"):
1991+
raise ValueError("Residues %s don't have both consensus labels:%s. \n Try setting `AA_format='try_consensus'`" % (
1992+
self._attribute_residues.names_short,
1993+
self._attribute_residues.consensus_labels))
1994+
elif AA_format.startswith("try_"):
1995+
cands = _mdcu.str_and_dict.splitlabel(self.labels.w_fragments_short_AA)
1996+
label=[]
1997+
for ii, lab in enumerate(self._attribute_residues.consensus_labels):
1998+
if lab is None:
1999+
label.append(cands[ii])
2000+
else:
2001+
label.append(lab)
2002+
label="-".join(label)
2003+
else:
2004+
label = self.labels.just_consensus
2005+
19902006
else:
19912007
raise ValueError(AA_format)
19922008
if defrag is not None:

‎mdciao/filenames/filenames.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ def __init__(self):
101101
self.tip_json = _path.join(self.json_path,"tip.json")
102102
self.tip_dat= _path.join(self.json_path,"tip.dat")
103103
self.tip_residx_dat= _path.join(self.json_path,"tip_residx.dat")
104+
self.tip_consensus_dat = _path.join(self.json_path,"tip_consensus.dat")
105+
self.tip_consensus_json = _path.join(self.json_path,"tip_consensus.json")
106+
104107

105108
#zip
106109
self.zipfile_two_empties = _path.join(self.example_path,"two_empty_files.zip")

‎mdciao/fragments/fragments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ def check_if_fragment_clashes(sub_frag, fragname, fragments, top,
821821
# Get the fragment idxs of all residues in this fragment
822822
ifrags = [_mdcu.lists.in_what_fragment(idx, fragments) for idx in sub_frag]
823823

824-
frag_cands = [ifrag for ifrag in _pandas_unique(ifrags) if ifrag is not None]
824+
frag_cands = [ifrag for ifrag in _pandas_unique(_np.array(ifrags)) if ifrag is not None]
825825
if prompt:
826826
was_subfragment = len(frag_cands) <= 1
827827
if not was_subfragment:

‎mdciao/nomenclature/nomenclature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,7 +2305,7 @@ def _map2defs(cons_list, splitchar="."):
23052305
def _sort_consensus_labels(subset, sorted_superset,
23062306
append_diffset=True):
23072307
r"""
2308-
Sort consensus labels (GPCR or CGN)
2308+
Sort consensus labels (GPCR, CGN, KLIFS)
23092309
23102310
Parameters
23112311
----------

‎mdciao/plots/plots.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@
3838

3939
import mdciao.utils as _mdcu
4040

41+
from mdciao.nomenclature.nomenclature import _sort_all_consensus_labels
42+
# The above line introduces a dependency of 'plots' on 'nomenclature', which were
43+
# uncoupled so far. The alternative would be to put '_sort_all_consensus_labels'
44+
# into 'utils.str_and_dict' (since it's essentially string operations).
45+
# However, as plotting methods become increasing nomenclature-aware, such a
46+
# plots -> nomenclature dependency will likely come in the future
47+
4148
from os import path as _path
4249

4350
from collections import defaultdict as _defdict
@@ -48,7 +55,7 @@
4855

4956
from pandas import DataFrame as _DataFrame
5057

51-
_metric_types_for_sorting = frozenset(["mean", "std", "numeric", "residue", "keep"])
58+
_schemes_for_sorting = frozenset(["mean", "std", "numeric", "residue", "keep", "consensus"])
5259

5360
def plot_w_smoothing_auto(y, ax=None, label=None, color=None, x=None, background=True, n_smooth_hw=0, ls="-"):
5461
r"""
@@ -290,6 +297,7 @@ def _pop_keys_by_scheme(sort_by, freqs_by_sys_by_ctc, mean_std_by_ctc,
290297
drop_below["numeric"] = drop_below["mean"]
291298
drop_below["residue"] = drop_below["mean"]
292299
drop_below["list"] = drop_below["mean"]
300+
drop_below["consensus"] = drop_below["mean"]
293301

294302
drop_above = lambda ctc: all([idict[ctc] >= identity_cutoff for idict in freqs_by_sys_by_ctc.values()]) \
295303
and remove_identities
@@ -344,7 +352,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
344352
assert len(all_ctc_keys) == len(list(freqs_by_sys_by_ctc[sk].keys())), ValueError("This is not a unified dictionary")
345353

346354
# 0. Compute means and stds for everybody
347-
dict_for_sorting = {key: {key : None for key in all_ctc_keys} for key in list(_metric_types_for_sorting)+["list"]}
355+
dict_for_sorting = {key: {key : None for key in all_ctc_keys} for key in list(_schemes_for_sorting) + ["list"]}
348356
for key in all_ctc_keys:
349357
dict_for_sorting["std"][key] = _np.std([idict[key] for idict in freqs_by_sys_by_ctc.values()])
350358
dict_for_sorting["mean"][key] = _np.mean([idict[key] for idict in freqs_by_sys_by_ctc.values()])
@@ -358,7 +366,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
358366
kept_keys = [key for key in sort_by if key in all_ctc_keys] #setops don't conserve order
359367
excluded_ctc_keys = [key for key in all_ctc_keys if key not in kept_keys] #setops don't conserve order
360368
sort_by = "list"
361-
elif sort_by in _metric_types_for_sorting:
369+
elif sort_by in _schemes_for_sorting:
362370

363371
# Then sort, in case sort_by wasn't a list but an actual scheme (has its own method)
364372
kept_keys = _sorter_by_key_or_val(sort_by, dict_for_sorting[sort_by])
@@ -367,7 +375,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
367375

368376
excluded_ctc_keys = []
369377
else:
370-
raise ValueError(f"Argument 'sort_by' has to be one of {list(_metric_types_for_sorting)}, but not '{sort_by}'")
378+
raise ValueError(f"Argument 'sort_by' has to be one of {list(_schemes_for_sorting)}, but not '{sort_by}'")
371379

372380
freqs_by_sys_by_ctc = {skey : {key : sval[key] for key in kept_keys} for skey, sval in freqs_by_sys_by_ctc.items()}
373381

@@ -1757,6 +1765,8 @@ def _sorter_by_key_or_val(sort_by, indict):
17571765
# In[5]: natsorted(["0-20", "0-10", "ALA30-GLU50", "ALA30-GLU40", "ALA", "GLU5-ALA20"])
17581766
# Out[5]: ['0-10', '0-20', 'ALA', 'ALA30-GLU40', 'ALA30-GLU50', 'GLU5-ALA20']
17591767
# -> we would want ['0-10', '0-20', 'GLU5-ALA20', 'ALA30-GLU40', 'ALA30-GLU50', 'ALA']
1768+
elif sort_by == "consensus":
1769+
ordered_keys = _sort_all_consensus_labels(all_ctc_keys)
17601770
elif sort_by in ["mean", "std"]:
17611771
ordered_keys = list(_mdcu.str_and_dict.sort_dict_by_asc_values(indict).keys())
17621772
elif sort_by == "keep":

0 commit comments

Comments
 (0)
Please sign in to comment.