Skip to content
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 30 additions & 22 deletions diffpy/snmf/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from diffpy.utils.parsers.loaddata import loadData


def initialize_variables(data_input, component_amount, data_type, sparsity=1, smoothness=1e18):
def initialize_variables(data_input, number_of_components, data_type, sparsity=1, smoothness=1e18):
"""Determines the variables and initial values used in the SNMF algorithm.

Parameters
Expand All @@ -13,7 +13,7 @@ def initialize_variables(data_input, component_amount, data_type, sparsity=1, sm
The observed or simulated PDF or XRD data provided by the user. Has dimensions R x N where R is the signal length
and N is the number of PDF/XRD signals.

component_amount: int
number_of_components: int
The number of component signals the user would like to decompose 'data_input' into.

data_type: str
Expand All @@ -40,27 +40,30 @@ def initialize_variables(data_input, component_amount, data_type, sparsity=1, sm

"""
signal_length = data_input.shape[0]
moment_amount = data_input.shape[1]
number_of_moments = data_input.shape[1]

component_matrix_guess = np.random.rand(signal_length, component_amount)
weight_matrix_guess = np.random.rand(component_amount, moment_amount)
stretching_matrix_guess = np.ones(component_amount, moment_amount) + np.random.randn(component_amount,
moment_amount) * 1e-3
component_matrix_guess = np.random.rand(signal_length, number_of_components)
weight_matrix_guess = np.random.rand(number_of_components, number_of_moments)
stretching_matrix_guess = np.ones((number_of_components, number_of_moments)) + np.random.randn(number_of_components,
number_of_moments) * 1e-3

diagonals = [np.ones(moment_amount - 2), -2 * np.ones(moment_amount - 2), np.ones(moment_amount - 2)]
smoothness_term = .25 * scipy.sparse.diags(diagonals, [0, 1, 2], shape=(moment_amount - 2, moment_amount))
diagonals = [np.ones(number_of_moments - 2), -2 * np.ones(number_of_moments - 2), np.ones(number_of_moments - 2)]
smoothness_term = .25 * scipy.sparse.diags(diagonals, [0, 1, 2], shape=(number_of_moments - 2, number_of_moments))

hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * component_amount)
sequence = np.arange(moment_amount * component_amount).reshape(component_amount, moment_amount).T.flatten()
hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * number_of_components)
sequence = np.arange(number_of_moments * number_of_components).reshape(number_of_components,
number_of_moments).T.flatten()

hessian_helper_matrix = hessian_helper_matrix.tocsr()
hessian_helper_matrix = hessian_helper_matrix[sequence, :][:, sequence]

return {
"signal_length": signal_length,
"moment_amount": moment_amount,
"component_matrix_guess": component_matrix_guess,
"weight_matrix_guess": weight_matrix_guess,
"stretching_matrix_guess": stretching_matrix_guess,
"component_amount": component_amount,
"number_of_moments": number_of_moments,
"component_matrix": component_matrix_guess,
"weights_matrix": weight_matrix_guess,
"stretching_factor_matrix": stretching_matrix_guess,
"number_of_components": number_of_components,
"data_type": data_type,
"smoothness": smoothness,
"sparsity": sparsity,
Expand Down Expand Up @@ -91,18 +94,17 @@ def load_input_signals(file_path=None):

"""

if file_path is None:
directory_path = Path.cwd()
else:
directory_path = Path(file_path)
directory_path = Path(file_path)

values_list = []
grid_list = []
current_grid = []
file_extension = ""
for item in directory_path.iterdir():
if item.is_file():
file_extension = Path(item).suffix
data = loadData(item.resolve())
if current_grid and current_grid != data[:, 0]:
if len(current_grid) != 0 and (current_grid != data[:, 0]).any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a comment line would help here with the intent. I.e., why do we ignore and not handle if the data are on a different grid. If I remember correctly t here was a physics reason (mathematically it should be straightforward). Just recording for future people.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a comment line to explain this

print(f"{item.name} was ignored as it is not on a compatible grid.")
continue
else:
Expand All @@ -113,4 +115,10 @@ def load_input_signals(file_path=None):
grid_array = np.column_stack(grid_list)
grid_vector = np.unique(grid_array, axis=1)
values_array = np.column_stack(values_list)
return grid_vector, values_array
if file_extension in {'.gr', '.chi'}:
data_type = 'pdf'
elif file_extension in {'iq', '.xy', '.xye', '.xrd'}:
data_type = 'xrd'
else:
data_type = None
return grid_vector, values_array, data_type
62 changes: 62 additions & 0 deletions diffpy/snmf/stretchednmfapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np
import argparse
from pathlib import Path

from diffpy.snmf.io import load_input_signals, initialize_variables
from diffpy.snmf.subroutines import update_weights_matrix, get_residual_matrix, objective_function


def create_parser():
parser = argparse.ArgumentParser(
prog="stretched_nmf",
description="Stretched Nonnegative Matrix Factorization"
)

parser.add_argument('-i', '--input-directory', type=str, default=None,
help="Directory containing experimental data. Defaults to current working directory.")
parser.add_argument('-o', '--output-directory', type=str,
help="The directory where the results will be written. Defaults to '<input_directory>/snmf_results'.")
parser.add_argument('-t', '--data-type', type=str, choices=['powder_diffraction', 'pdf'],
help="The type of the experimental data.")
parser.add_argument('components', type=int,
help="The number of component signals for the NMF decomposition. Must be an integer greater than 0.")
parser.add_argument('-v', '--version', action='version', help='Print the software version number')
args = parser.parse_args()
return args


def main():
args = create_parser()
if args.input_directory is None:
args.input_directory = Path.cwd()

grid, data_input, data_type = load_input_signals(args.input_directory)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is maybe clearer to put the default logic here that turns input_directory from None to cwd. Or else, make it an optional argument in load_input_data.

Again, it is about code readability.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do this.

if args.data_type:
data_type = args.data_type
variables = initialize_variables(data_input, args.components, data_type)

if variables["data_type"] == 'pdf':
lifted_data = data_input - np.ndarray.min(data_input[:]) # Will later use to function in subroutines

maxiter = 300

for out_iter in range(maxiter):
variables["weights_matrix"] = update_weights_matrix(variables["number_of_components"],
variables["signal_length"],
variables["stretching_factor_matrix"],
variables["component_matrix"], lifted_data,
variables["number_of_moments"], variables["weights_matrix"],
None)

residual_matrix = get_residual_matrix(variables["component_matrix"], variables["weights_matrix"],
variables["stretching_factor_matrix"], lifted_data,
variables["number_of_moments"], variables["number_of_components"],
variables["signal_length"])
fun = objective_function(residual_matrix, variables["stretching_factor_matrix"], variables["smoothness"],
variables["smoothness_term"], variables["component_matrix"], variables["sparsity"])

return 1


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

# Installation script for diffpy.pdfmorph
# Installation script for diffpy.snmf

"""diffpy.snmf - a package implementing the stretched NMF algorithm.

Expand Down Expand Up @@ -28,7 +28,7 @@
entry_points={
# define console_scripts here, see setuptools docs for details.
'console_scripts': [
'pdfmorph = diffpy.pdfmorph.pdfmorphapp:main',
'stretched_nmf = diffpy.snmf.stretchednmfapp:main',
],
},
test_suite='tests',
Expand Down