diffpy · john-halloran · Jun 6, 2025 · Jun 8, 2025 · Jun 8, 2025 · sbillinge
diff --git a/src/diffpy/snmf/main.py b/src/diffpy/snmf/main.py
@@ -7,27 +7,8 @@
 Y0 = np.loadtxt("input/W0.txt", dtype=float)
 N, M = MM.shape
 
-# Convert to DataFrames for display
-# df_X = pd.DataFrame(X, columns=[f"Comp_{i+1}" for i in range(X.shape[1])])
-# df_Y = pd.DataFrame(Y, columns=[f"Sample_{i+1}" for i in range(Y.shape[1])])
-# df_MM = pd.DataFrame(MM, columns=[f"Sample_{i+1}" for i in range(MM.shape[1])])
-# df_Y0 = pd.DataFrame(Y0, columns=[f"Sample_{i+1}" for i in range(Y0.shape[1])])
-
-# Print the matrices
-"""
-print("Feature Matrix (X):\n", df_X, "\n")
-print("Coefficient Matrix (Y):\n", df_Y, "\n")
-print("Data Matrix (MM):\n", df_MM, "\n")
-print("Initial Guess (Y0):\n", df_Y0, "\n")
-"""
-
-
-my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, components=2)
+my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, n_components=2)
 print("Done")
-# print(f"My final guess for X: {my_model.X}")
-# print(f"My final guess for Y: {my_model.Y}")
-# print(f"Compare to true X: {X_norm}")
-# print(f"Compare to true Y: {Y_norm}")
 np.savetxt("my_norm_X.txt", my_model.X, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_Y.txt", my_model.Y, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_A.txt", my_model.A, fmt="%.6g", delimiter=" ")
diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py
@@ -4,8 +4,79 @@
 
 
 class SNMFOptimizer:
-    def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500, tol=5e-7, components=None):
-        print("Initializing SNMF Optimizer")
+    def __init__(
+        self,
+        MM,
+        Y0=None,
+        X0=None,
+        A=None,
+        rho=1e12,
+        eta=610,
+        max_iter=500,
+        tol=5e-7,
+        n_components=None,
+        random_state=None,
+    ):
+        """Run sNMF based on an ndarray, parameters, and either a number
+        of components or a set of initial guess matrices.
+
+        Currently instantiating the SNMFOptimizer class runs all the analysis
+        immediately. The results can then be accessed as instance attributes
+        of the class (X, Y, and A). Eventually, this will be changed such
+        that __init__ only prepares for the optimization, which will can then
+        be done using fit_transform.
+
+        Parameters
+        ----------
+        MM: ndarray
+            A numpy array containing the data to be decomposed. Rows correspond
+            to different samples/angles, while columns correspond to different
+            conditions with different stretching. Currently, there is no option
+            to treat the first column (commonly containing 2theta angles, sample
+            index, etc) differently, so if present it must be stripped in advance.
+        Y0: ndarray
+            A numpy array containing initial guesses for the component weights
+            at each stretching condition, with number of rows equal to the assumed
+            number of components and number of columns equal to the number of
+            conditions (same number of columns as MM). Must be provided if
+            n_components is not provided. Will override n_components if both are
+            provided.
+        X0: ndarray
+            A numpy array containing initial guesses for the intensities of each
+            component per row/sample/angle. Has rows equal to the rows of MM and
+            columns equal to n_components or the number of rows of Y0.
+        A: ndarray
+            A numpy array containing initial guesses for the stretching factor for
+            each component, at each condition. Has number of rows equal to n_components
+            or the number of rows of Y0, and columns equal to the number of conditions
+            (columns of MM).
+        rho: float
+            A stretching factor that influences the decomposition. Zero corresponds to
+            no stretching present. Relatively insensitive and typically adjusted in
+            powers of 10.
+        eta: float
+            A sparsity factor than influences the decomposition. Should be set to zero
+            for non sparse data such as PDF. Can be used to improve results for sparse
+            data such as XRD, but due to instability, should be used only after first
+            selecting the best value for rho.
+        max_iter: int
+            The maximum number of times to update each of A, X, and Y before stopping
+            the optimization.
+        tol: float
+            The minimum fractional improvement in the objective function to allow
+            without terminating the optimization. Note that a minimum of 20 updates
+            are run before this parameter is checked.
+        n_components: int
+            The number of components to attempt to extract from MM. Note that this will
+            be overridden by Y0 if that is provided, but must be provided if no Y0 is
+            provided.
+        random_state: int
+            Used to set a reproducible seed for the initial matrices used in the
+            optimization. Due to the non-convex nature of the problem, results may vary
+            even with the same initial guesses, so this does not make the program
+            deterministic.
+        """
+
         self.MM = MM
         self.X0 = X0
         self.Y0 = Y0
@@ -15,23 +86,22 @@ def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500
         # Capture matrix dimensions
         self.N, self.M = MM.shape
         self.num_updates = 0
+        self.rng = np.random.default_rng(random_state)
 
         if Y0 is None:
-            if components is None:
-                raise ValueError("Must provide either Y0 or a number of components.")
+            if n_components is None:
+                raise ValueError("Must provide either Y0 or n_components.")
             else:
-                self.K = components
-                self.Y0 = np.random.beta(a=2.5, b=1.5, size=(self.K, self.M))  # This is untested
+                self.K = n_components
+                self.Y0 = self.rng.beta(a=2.5, b=1.5, size=(self.K, self.M))
         else:
             self.K = Y0.shape[0]
 
-        # Initialize A, X0 if not provided
         if self.A is None:
-            self.A = np.ones((self.K, self.M)) + np.random.randn(self.K, self.M) * 1e-3  # Small perturbation
+            self.A = np.ones((self.K, self.M)) + self.rng.normal(0, 1e-3, size=(self.K, self.M))
         if self.X0 is None:
-            self.X0 = np.random.rand(self.N, self.K)  # Ensures values in [0,1]
+            self.X0 = self.rng.random((self.N, self.K))
 
-        # Initialize solution matrices to be iterated on
         self.X = np.maximum(0, self.X0)
         self.Y = np.maximum(0, self.Y0)