gph82 · Aug 9, 2024
diff --git a/‎mdciao/cli/cli.py
Lines changed: 56 additions & 28 deletions b/‎mdciao/cli/cli.py
Lines changed: 56 additions & 28 deletions
diff --git a/‎mdciao/contacts/contacts.py
Lines changed: 26 additions & 10 deletions b/‎mdciao/contacts/contacts.py
Lines changed: 26 additions & 10 deletions
diff --git a/‎mdciao/filenames/filenames.py
Lines changed: 3 additions & 0 deletions b/‎mdciao/filenames/filenames.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎mdciao/fragments/fragments.py
Lines changed: 1 addition & 1 deletion b/‎mdciao/fragments/fragments.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mdciao/nomenclature/nomenclature.py
Lines changed: 1 addition & 1 deletion b/‎mdciao/nomenclature/nomenclature.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mdciao/plots/plots.py
Lines changed: 14 additions & 4 deletions b/‎mdciao/plots/plots.py
Lines changed: 14 additions & 4 deletions
@@ -384,7 +384,7 @@ def _manage_timedep_ploting_and_saving_options(ctc_grp,
             fname = _path.join(fn.output_dir, iname)
             ifig.axes[0].set_title("%s" % title) # TODO consider firstname lastname
             ifig.savefig(fname, bbox_inches="tight", dpi=fn.graphic_dpi)
-            _plt.close(ifig)
+            #_plt.close(ifig)
             print(fname)
 
     # even if no figures were produced or saved, we can still save the trajs
@@ -1692,18 +1692,45 @@ def sites(site_inputs,
 
     Parameters
     ----------
-    site_inputs : dict or list of dicts
+    site_inputs : dict or path to file, or list thereof
         Site(s) to compute. A site can be either
         a path to a site file in json format or
         directly a site dictionary. A site dictionary
         is something like
 
-        >>> {"name":"site",
-        >>>  "pairs":{"AAresSeq":["GLU30-ARG40",
-        >>>                       "LYS31-W70"]}}
+        >>> {"name": "interesting contacts",
+        >>>  "pairs": {"AAresSeq": ["L394-K270",
+        >>>                         "D381-Q229"]}}
 
         Any site containing a residue that can't be
         found in the topology will be discarded.
+        The list in "pairs" can be specified as:
+         * 'AAresSeq':
+          >>> {"name": "interesting contacts",
+          >>>  "pairs": {"AAresSeq": ["L394-K270",
+          >>>                         "D381-Q229"]}}
+          The 'AAresSeq' definitions are transferable to
+          another system where the same aminoacids are
+          present, regardless of their actual zero-indexing.
+         * 'residx':
+          >>> {"name": "interesting contacts",
+          >>>  "pairs": {"residx":[[353,972],
+          >>>                      [340,956]]}}
+          The 'pairs' definitions are only transferable
+          across systems as long both systems share the same
+          zero-indexing scheme.
+         * 'consensus'
+          >>> {"name": "interesting contacts",
+          >>>  "pairs": {"consensus": ["G.H5.26-6.32x32",
+          >>>                          "G.H5.13-5.68x68"]}}
+          The 'consensus' definitions are transferable to
+          another system, even if the selected aminoacids are
+          different. Please note, in order
+          to use 'consenus' definitions, you need
+          to pass at least one (or more) of the `GPCR_UniProt`,
+          `CGN_UniProt` or `KLIFS_string` arguments, else
+          there is no way to know to which residues the labels
+          belong to.
         See :obj:`mdciao.sites` for more info on
         the site format.
     trajectories : str, :obj:`mdtraj.Trajectory` or lists thereof
@@ -1758,28 +1785,28 @@ def sites(site_inputs,
         (allows for object re-use when in API mode)
         See :obj:`mdciao.nomenclature` for more info and references.
     KLIFS_string : str or :obj:`mdciao.nomenclature.LabelerKLIFS`, default is None
-            String for kinase KLIFS nomenclature. First, try to locate a local
-            file that directly has the `KLIFS_string` as a name. If that fails, then
-            combine the filename-format expected by :obj:`mdciao.nomenclature.LabelerKLIFS`
-            with `KLIFS_string` to construct a filename and check again.
-            If that doesn't work, then go online to contact the KLIFS database.
-
-            For the online lookup in the KLIFS database, the string
-            has to be formatted "key:value", which ultimately leads to a given KLIFS entry.
-            Acceptable keys and values for `KLIFS_string` are:
-                * "UniProtAC", e.g. "UniProtAC:P31751"
-                * "kinase_ID", e.g. "kinase_ID:2"
-                * "structure_ID", e.g. "structure_ID:1904", e.g. "P31751",
-            Please check the documentation on :obj:`mdciao.nomenclature.LabelerKLIFS`
-            for a more elaborate explanation on when to pick one of these key:value
-            pairs.
-
-            Finally, if `KLIFS_string` is an :obj:`mdciao.nomenclature.LabelerKLIFS`,
-            use this object directly (allows for object re-use when in API mode).
-            See :obj:`mdciao.nomenclature` for more info and references. Alos, please note
-            the difference between UniProt Accession Code
-            and UniProt entry name as explained
-            `here <https://www.uniprot.org/help/difference%5Faccession%5Fentryname>`_ .
+        String for kinase KLIFS nomenclature. First, try to locate a local
+        file that directly has the `KLIFS_string` as a name. If that fails, then
+        combine the filename-format expected by :obj:`mdciao.nomenclature.LabelerKLIFS`
+        with `KLIFS_string` to construct a filename and check again.
+        If that doesn't work, then go online to contact the KLIFS database.
+
+        For the online lookup in the KLIFS database, the string
+        has to be formatted "key:value", which ultimately leads to a given KLIFS entry.
+        Acceptable keys and values for `KLIFS_string` are:
+            * "UniProtAC", e.g. "UniProtAC:P31751"
+            * "kinase_ID", e.g. "kinase_ID:2"
+            * "structure_ID", e.g. "structure_ID:1904", e.g. "P31751",
+        Please check the documentation on :obj:`mdciao.nomenclature.LabelerKLIFS`
+        for a more elaborate explanation on when to pick one of these key:value
+        pairs.
+
+        Finally, if `KLIFS_string` is an :obj:`mdciao.nomenclature.LabelerKLIFS`,
+        use this object directly (allows for object re-use when in API mode).
+        See :obj:`mdciao.nomenclature` for more info and references. Alos, please note
+        the difference between UniProt Accession Code
+        and UniProt entry name as explained
+        `here <https://www.uniprot.org/help/difference%5Faccession%5Fentryname>`_ .
     fragments : str, list, None, default is "lig_resSeq+"
         Topology fragments. There exist several input modes:
 
@@ -1918,6 +1945,7 @@ def sites(site_inputs,
     ctc_idxs_small, site_maps = _mdcsites.sites_to_res_pairs(sites, refgeom.top,
                                                              fragments=fragments_as_residue_idxs,
                                                              default_fragment_index=default_fragment_index,
+                                                             consensus_maps=[consensus_maps if len(consensus_maps)>0 else None][0],
                                                              fragment_names=fragment_names)
     if None in ctc_idxs_small:
         print("Some definitions of the 'site_inputs' contain one or more residues not found in the input topology.")
@@ -2186,7 +2214,7 @@ def _res_resolver(res_range, top, fragments, midstring=None, GPCR_UniProt=None,
 
     res_idxs_list = _mdcu.residue_and_atom.rangeexpand_residues2residxs(res_range, fragments, top,
                                                                         pick_this_fragment_by_default=None,
-                                                                        additional_resnaming_dicts=consensus_maps,
+                                                                        additional_resnaming_dicts=[consensus_maps if len(consensus_maps)>0 else None][0],
                                                                         **rangeexpand_residues2residxs_kwargs,
                                                                         )
 
 
@@ -1953,9 +1953,13 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
         Parameters
         ----------
         AA_format : str, default is "short"
-            Amino-acid format for the label, can
-            be "short" (A35@4.50), "long" (ALA35@4.50),
-            or "just_consensus" (4.50)
+            Amino-acid format for the label, can be
+             * short: A35@4.55
+             * "long": ALA35@4.50
+             * "just_consensus": 4.50
+             * "try_consensus":  4.50 if consensus labeling is present,
+               else default to "short"
+
         split_label : bool, default is True
             Split the labels so that stacked contact labels
             become easier-to-read in plain ascii formats
@@ -1966,10 +1970,10 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
             contact label. Default is to leave
             them as is, e.g. would be "@"
         fmt1 : str, default is "%-15s"
-            Specify how the labels of res1 should formatted.
+            Specify how the labels of res1 should be formatted.
             Only has effect if `split_label` is True
         fmt2 : str, default is "%-15s"
-            Specify how the labels of res2 should formatted.
+            Specify how the labels of res2 should be formatted.
             Only has effect if `split_label` is True
         Returns
         -------
@@ -1980,13 +1984,25 @@ def label_flex(self, AA_format="short", split_label=True, defrag=None, fmt1="%-1
             label = self.labels.w_fragments_short_AA
         elif AA_format== 'long':
             label = self.labels.w_fragments
-        elif AA_format== 'just_consensus':
+        elif AA_format.endswith('_consensus'):
             #TODO where do we put this assertion?
             if None in self._attribute_residues.consensus_labels:
-                raise ValueError("Residues %s don't have both consensus labels:%s" % (
-                    self._attribute_residues.names_short,
-                    self._attribute_residues.consensus_labels))
-            label = self.labels.just_consensus
+                if AA_format.startswith("just_"):
+                    raise ValueError("Residues %s don't have both consensus labels:%s. \n Try setting `AA_format='try_consensus'`" % (
+                        self._attribute_residues.names_short,
+                        self._attribute_residues.consensus_labels))
+                elif AA_format.startswith("try_"):
+                    cands = _mdcu.str_and_dict.splitlabel(self.labels.w_fragments_short_AA)
+                    label=[]
+                    for ii, lab in enumerate(self._attribute_residues.consensus_labels):
+                        if lab is None:
+                            label.append(cands[ii])
+                        else:
+                            label.append(lab)
+                    label="-".join(label)
+            else:
+                label = self.labels.just_consensus
+
         else:
             raise ValueError(AA_format)
         if defrag is not None:
 
@@ -101,6 +101,9 @@ def __init__(self):
         self.tip_json = _path.join(self.json_path,"tip.json")
         self.tip_dat= _path.join(self.json_path,"tip.dat")
         self.tip_residx_dat= _path.join(self.json_path,"tip_residx.dat")
+        self.tip_consensus_dat = _path.join(self.json_path,"tip_consensus.dat")
+        self.tip_consensus_json = _path.join(self.json_path,"tip_consensus.json")
+
 
         #zip
         self.zipfile_two_empties = _path.join(self.example_path,"two_empty_files.zip")
 
@@ -821,7 +821,7 @@ def check_if_fragment_clashes(sub_frag, fragname, fragments, top,
     # Get the fragment idxs of all residues in this fragment
     ifrags = [_mdcu.lists.in_what_fragment(idx, fragments) for idx in sub_frag]
 
-    frag_cands = [ifrag for ifrag in _pandas_unique(ifrags) if ifrag is not None]
+    frag_cands = [ifrag for ifrag in _pandas_unique(_np.array(ifrags)) if ifrag is not None]
     if prompt:
         was_subfragment = len(frag_cands) <= 1
         if not was_subfragment:
 
@@ -2305,7 +2305,7 @@ def _map2defs(cons_list, splitchar="."):
 def _sort_consensus_labels(subset, sorted_superset,
                            append_diffset=True):
     r"""
-    Sort consensus labels (GPCR or CGN)
+    Sort consensus labels (GPCR, CGN, KLIFS)
 
     Parameters
     ----------
 
@@ -38,6 +38,13 @@
 
 import mdciao.utils as _mdcu
 
+from mdciao.nomenclature.nomenclature import _sort_all_consensus_labels
+# The above line introduces a dependency of 'plots' on 'nomenclature', which were
+# uncoupled so far. The alternative would be to put '_sort_all_consensus_labels'
+# into 'utils.str_and_dict' (since it's essentially string operations).
+# However, as plotting methods become increasing nomenclature-aware, such a
+# plots -> nomenclature dependency will likely come in the future
+
 from os import path as _path
 
 from collections import defaultdict as _defdict
@@ -48,7 +55,7 @@
 
 from pandas import DataFrame as _DataFrame
 
-_metric_types_for_sorting = frozenset(["mean", "std", "numeric", "residue", "keep"])
+_schemes_for_sorting = frozenset(["mean", "std", "numeric", "residue", "keep", "consensus"])
 
 def plot_w_smoothing_auto(y, ax=None, label=None, color=None, x=None, background=True, n_smooth_hw=0, ls="-"):
     r"""
@@ -290,6 +297,7 @@ def _pop_keys_by_scheme(sort_by, freqs_by_sys_by_ctc, mean_std_by_ctc,
     drop_below["numeric"] = drop_below["mean"]
     drop_below["residue"] = drop_below["mean"]
     drop_below["list"]    = drop_below["mean"]
+    drop_below["consensus"] = drop_below["mean"]
 
     drop_above = lambda ctc: all([idict[ctc] >= identity_cutoff for idict in freqs_by_sys_by_ctc.values()]) \
                              and remove_identities
@@ -344,7 +352,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
         assert len(all_ctc_keys) == len(list(freqs_by_sys_by_ctc[sk].keys())), ValueError("This is not a unified dictionary")
 
     # 0. Compute means and stds for everybody
-    dict_for_sorting = {key: {key : None for key in all_ctc_keys} for key in list(_metric_types_for_sorting)+["list"]}
+    dict_for_sorting = {key: {key : None for key in all_ctc_keys} for key in list(_schemes_for_sorting) + ["list"]}
     for key in all_ctc_keys:
         dict_for_sorting["std"][key] = _np.std([idict[key] for idict in freqs_by_sys_by_ctc.values()])
         dict_for_sorting["mean"][key] = _np.mean([idict[key] for idict in freqs_by_sys_by_ctc.values()])
@@ -358,7 +366,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
         kept_keys = [key for key in sort_by if key in all_ctc_keys] #setops don't conserve order
         excluded_ctc_keys = [key for key in all_ctc_keys if key not in kept_keys] #setops don't conserve order
         sort_by = "list"
-    elif sort_by in _metric_types_for_sorting:
+    elif sort_by in _schemes_for_sorting:
 
         # Then sort, in case sort_by wasn't a list but an actual scheme (has its own method)
         kept_keys = _sorter_by_key_or_val(sort_by, dict_for_sorting[sort_by])
@@ -367,7 +375,7 @@ def _sorting_schemes(freqs_by_sys_by_ctc, sort_by='mean',
 
         excluded_ctc_keys = []
     else:
-        raise ValueError(f"Argument 'sort_by' has to be one of {list(_metric_types_for_sorting)}, but not '{sort_by}'")
+        raise ValueError(f"Argument 'sort_by' has to be one of {list(_schemes_for_sorting)}, but not '{sort_by}'")
 
     freqs_by_sys_by_ctc = {skey : {key : sval[key] for key in kept_keys} for skey, sval in freqs_by_sys_by_ctc.items()}
 
@@ -1757,6 +1765,8 @@ def _sorter_by_key_or_val(sort_by, indict):
         # In[5]: natsorted(["0-20", "0-10", "ALA30-GLU50", "ALA30-GLU40", "ALA", "GLU5-ALA20"])
         # Out[5]: ['0-10', '0-20', 'ALA', 'ALA30-GLU40', 'ALA30-GLU50', 'GLU5-ALA20']
         # -> we would want ['0-10', '0-20', 'GLU5-ALA20', 'ALA30-GLU40', 'ALA30-GLU50', 'ALA']
+    elif sort_by == "consensus":
+        ordered_keys = _sort_all_consensus_labels(all_ctc_keys)
     elif sort_by in ["mean", "std"]:
         ordered_keys = list(_mdcu.str_and_dict.sort_dict_by_asc_values(indict).keys())
     elif sort_by == "keep":