Skip to content

Commit

Permalink
Allow opening .npz PAE files produced by Boltz-1. Ticket #16792
Browse files Browse the repository at this point in the history
  • Loading branch information
tomgoddard committed Feb 6, 2025
1 parent c038063 commit 3002f7d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/bundles/alphafold/bundle_info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

<Providers manager="data formats">
<Provider name="AlphaFold PAE" synopsis="AlphaFold predicted aligned error" category="Structure analysis"
suffixes=".json" default_for=".json" nicknames="pae"
suffixes=".json,.npy,.npz,.pkl" default_for=".json" nicknames="pae"
reference_url="https://en.wikipedia.org/wiki/Predicted_Aligned_Error" />
</Providers>

Expand Down
27 changes: 19 additions & 8 deletions src/bundles/alphafold/src/pae.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, session, tool_name):
self._structure_menu = m
layout.addWidget(m.frame)

self._source_file = 'file (.json or .npy or .pkl)'
self._source_file = 'file (.json or .npy or .npz or .pkl)'
self._source_database = f'{self.method} database ({self.database_key} id)'
from chimerax.ui.widgets import EntriesRow
ft = EntriesRow(parent, 'Predicted aligned error (PAE) from',
Expand Down Expand Up @@ -167,7 +167,7 @@ def _choose_pae_file(self):
from Qt.QtWidgets import QFileDialog
path, ftype = QFileDialog.getOpenFileName(parent, caption = 'Predicted aligned error',
directory = dir,
filter = 'PAE file (*.json *.npy *.pkl)')
filter = 'PAE file (*.json *.npy *.npz *.pkl)')
if path:
self._pae_file.setText(path)
self._open_pae()
Expand Down Expand Up @@ -202,8 +202,10 @@ def _open_pae_from_file(self, structure):
if not isfile(path):
raise UserError(f'File "{path}" does not exist.')

if not path.endswith('.json') and not path.endswith('.npy') and not path.endswith('.pkl'):
raise UserError(f'PAE file suffix must be ".json" or ".npy" or ".pkl".')
suffixes = ('.json', '.npy', '.npz', '.pkl')
if len([path for suffix in suffixes if path.endswith(suffix)]) == 0:
suf = ' or '.join(f'"{suffix}"' for suffix in suffixes)
raise UserError(f'PAE file suffix must be {suf}.')

from chimerax.core.commands import run, quote_if_necessary
cmd = '%s pae #%s file %s' % (self.command, structure.id_string, quote_if_necessary(path))
Expand Down Expand Up @@ -298,6 +300,11 @@ def _show_help(self):
# pae.model_idx_2.rank_0.npy
# not scores.model_idx_2.rank_0.json which contains summary scores
#
# Boltz-1 local run
#
# nipah_zmr_model_0.cif
# pae_nipah_zmr_model_0.npz
#
# Finding json/pkl with matching prefix works except for full alphafold which
# wants matching suffix.
#
Expand All @@ -311,7 +318,7 @@ def _matching_pae_file(structure_path):
dfiles = listdir(dir)
pkl_files = [f for f in dfiles if f.endswith('.pkl')]
json_files = [f for f in dfiles if f.endswith('.json') and not f.startswith('confidence_')]
npy_files = [f for f in dfiles if f.endswith('.npy')]
npy_files = [f for f in dfiles if f.endswith('.npy') or f.endswith('.npz')]

if len(pkl_files) == 0 and len(json_files) == 0 and len(npy_files) == 0:
return None
Expand All @@ -326,9 +333,11 @@ def _matching_pae_file(structure_path):
min_length = min(6, len(splitext(filename)[0]))
mfile = None

# Check for precise name match of Chai-1 numpy files
# Check for precise name match of Chai-1 or Boltz-1 numpy files
if len(npy_files) > 0:
mfile = _longest_matching_suffix(filename, npy_files, min_length = min_length)
if mfile is None:
mfile = _longest_matching_suffix('pae_' + filename, npy_files, min_length = min_length) # Boltz-1
if mfile is None:
mfile = _longest_matching_prefix(filename, npy_files, min_length = min_length)

Expand Down Expand Up @@ -1212,13 +1221,13 @@ def _include_deleted_residues(res):
def read_pae_matrix(path):
if path.endswith('.json'):
return read_json_pae_matrix(path)
elif path.endswith('.npy'):
elif path.endswith('.npy') or path.endswith('.npz'):
return read_numpy_pae_matrix(path)
elif path.endswith('.pkl'):
return read_pickle_pae_matrix(path)
else:
from chimerax.core.errors import UserError
raise UserError(f'AlphaFold predicted aligned error (PAE) files must be in JSON (*.json) or numpy (*.npy) or pickle (*.pkl) format, {path} unrecognized format')
raise UserError(f'AlphaFold predicted aligned error (PAE) files must be in JSON (*.json) or numpy (*.npy, *.npz) or pickle (*.pkl) format, {path} unrecognized format')

# -----------------------------------------------------------------------------
#
Expand Down Expand Up @@ -1273,6 +1282,8 @@ def read_json_pae_matrix(path):
def read_numpy_pae_matrix(path):
import numpy
pae = numpy.load(path)
if path.endswith('.npz'):
pae = pae['pae']
return pae

# -----------------------------------------------------------------------------
Expand Down

0 comments on commit 3002f7d

Please sign in to comment.