FEAT: Yeehaw finally figured out 2D penalized splines

Someone should give Paul Eilers the Nobel prize, dude is the GOAT. On a more serious note, the internals of the PSpline2D class will most likely change, but the external calls within the baseline algorithms should remain the same. Implemented the 2D versions of irsqr, pspline_asls, pspline_airpls, pspline_arpls, pspline_iarpls, and pspline_psalsa.
derb12 · derb12 · Feb 13, 2024 · Apr 8, 2023 · Apr 17, 2023 · Apr 22, 2023
commit 8de57b023fd3ffaf7046c61ba11622fbc24f503f
diff --git a/pybaselines/two_d/_algorithm_setup.py b/pybaselines/two_d/_algorithm_setup.py
@@ -8,14 +8,14 @@
 
 from contextlib import contextmanager
 from functools import partial, wraps
+import warnings
 
 import numpy as np
 from scipy.ndimage import grey_opening
 
-from ._validation import (
-    _check_array, _check_half_window, _check_optional_array, _check_sized_array, _yx_arrays
-)
-from ..utils import _inverted_sort, pad_edges, relative_difference
+from ..utils import ParameterWarning, _inverted_sort, pad_edges, relative_difference
+from ._spline_utils import PSpline2D
+from ._validation import _check_array, _check_half_window, _check_optional_array, _check_scalar
 
 
 class _Algorithm2D:
@@ -376,6 +376,89 @@ def _setup_polynomial(self, y, weights=None, poly_order=2, calc_vander=False,
 
         return y, weight_array, pseudo_inverse
 
+    def _setup_spline(self, y, weights=None, spline_degree=3, num_knots=10,
+                      penalized=True, diff_order=3, lam=1, make_basis=True, allow_lower=True,
+                      reverse_diags=False, copy_weights=False):
+        """
+        Sets the starting parameters for doing spline fitting.
+
+        Parameters
+        ----------
+        y : numpy.ndarray, shape (N,)
+            The y-values of the measured data, already converted to a numpy
+            array by :meth:`._register`.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then will be an array with
+            size equal to N and all values set to 1.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        num_knots : int, optional
+            The number of interior knots for the splines. Default is 10.
+        penalized : bool, optional
+            Whether the basis matrix should be for a penalized spline or a regular
+            B-spline. Default is True, which creates the basis for a penalized spline.
+        diff_order : int, optional
+            The integer differential order for the spline penalty; must be greater than 0.
+            Default is 3. Only used if `penalized` is True.
+        lam : float, optional
+            The smoothing parameter, lambda. Typical values are between 10 and
+            1e8, but it strongly depends on the number of knots and the difference order.
+            Default is 1.
+        make_basis : bool, optional
+            If True (default), will create the matrix containing the spline basis functions.
+        allow_lower : boolean, optional
+            If True (default), will include only the lower non-zero diagonals of
+            the squared difference matrix. If False, will include all non-zero diagonals.
+        reverse_diags : boolean, optional
+            If True, will reverse the order of the diagonals of the penalty matrix.
+            Default is False.
+        copy_weights : boolean, optional
+            If True, will copy the array of input weights. Only needed if the
+            algorithm changes the weights in-place. Default is False.
+
+        Returns
+        -------
+        y : numpy.ndarray, shape (N,)
+            The y-values of the measured data, converted to a numpy array.
+        weight_array : numpy.ndarray, shape (N,)
+            The weight array for fitting the spline to the data.
+
+        Warns
+        -----
+        ParameterWarning
+            Raised if `diff_order` is greater than 4.
+
+        Notes
+        -----
+        `degree` is used instead of `order` like for polynomials since the order of a spline
+        is defined by convention as ``degree + 1``.
+
+        """
+        weight_array = _check_optional_array(
+            y.shape, weights, copy_input=copy_weights, check_finite=self._check_finite, ensure_1d=False  # TODO change y.shape to self._len or self._shape
+        )
+        weight_array = weight_array.ravel()
+        # TODO
+        #if self._sort_order is not None and weights is not None:
+        #    weight_array = weight_array[self._sort_order]
+        diff_order = _check_scalar(diff_order, 2, True)[0]
+        if make_basis:
+            if (diff_order > 4).any():
+                warnings.warn(
+                    ('differential orders greater than 4 can have numerical issues;'
+                     ' consider using a differential order of 2 or 3 instead'),
+                    ParameterWarning, stacklevel=2
+                )
+
+            if self.pspline is None or not self.pspline.same_basis(num_knots, spline_degree):
+                self.pspline = PSpline2D(
+                    self.x, self.z, num_knots, spline_degree, self._check_finite, lam, diff_order
+                )
+            else:
+                self.pspline.reset_penalty_diagonals(lam, diff_order)
+
+        return y.ravel(), weight_array
+
     def _setup_morphology(self, y, half_window=None, **window_kwargs):
         """
         Sets the starting parameters for morphology-based methods.

diff --git a/pybaselines/two_d/_spline_utils.py b/pybaselines/two_d/_spline_utils.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+"""Helper functions for using splines.
+
+Created on April 25, 2023
+@author: Donald Erb
+
+"""
+
+import numpy as np
+from scipy import sparse
+from scipy.sparse.linalg import spsolve
+
+from .._banded_utils import difference_matrix
+from .._spline_utils import _spline_basis, _spline_knots
+from ._validation import _check_array, _check_lam, _check_scalar
+
+
+class PSpline2D:
+    """
+    A Penalized Spline, which penalizes the difference of the spline coefficients.
+
+    Penalized splines (P-Splines) are solved with the following equation
+    ``(B.T @ W @ B + P) c = B.T @ W @ y`` where `c` is the spline coefficients, `B` is the
+    spline basis, the weights are the diagonal of `W`, the penalty is `P`, and `y` is the
+    fit data. The penalty `P` is usually in the form ``lam * D.T @ D``, where `lam` is a
+    penalty factor and `D` is the matrix version of the finite difference operator.
+
+    Attributes
+    ----------
+    basis : scipy.sparse.csr.csr_matrix, shape (N, M)
+        The spline basis. Has a shape of (`N,` `M`), where `N` is the number of points
+        in `x`, and `M` is the number of basis functions (equal to ``K - spline_degree - 1``
+        or equivalently ``num_knots + spline_degree - 1``).
+    coef : None or numpy.ndarray, shape (M,)
+        The spline coefficients. Is None if :meth:`.solve_pspline` has not been called
+        at least once.
+    knots : numpy.ndarray, shape (K,)
+        The knots for the spline. Has a shape of `K`, which is equal to
+        ``num_knots + 2 * spline_degree``.
+    num_knots : int
+        The number of internal knots (including the endpoints). The total number of knots
+        for the spline, `K`, is equal to ``num_knots + 2 * spline_degree``.
+    spline_degree : int
+        The degree of the spline (eg. a cubic spline would have a `spline_degree` of 3).
+    x : numpy.ndarray, shape (N,)
+        The x-values for the spline.
+
+    References
+    ----------
+    Eilers, P., et al. Fast and compact smoothing on large multidimensional grids. Computational
+    Statistics and Data Analysis, 2006, 50(1), 61-76.
+
+    """
+
+    def __init__(self, x, z, num_knots=100, spline_degree=3, check_finite=False, lam=1,
+                 diff_order=2):
+        """
+        Initializes the penalized spline by calculating the basis and penalty.
+
+        Parameters
+        ----------
+        x : array-like, shape (N,)
+            The x-values for the spline.
+        z : array-like, shape (L,)
+            The z-values for the spline.
+        num_knots : int or Sequence(int, int), optional
+            The number of internal knots for the spline, including the endpoints.
+            Default is 100.
+        spline_degree : int or Sequence(int, int), optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        check_finite : bool, optional
+            If True, will raise an error if any values in `x` are not finite. Default
+            is False, which skips the check.
+        lam : float or Sequence(float, float), optional
+            The penalty factor applied to the difference matrix. Larger values produce
+            smoother results. Must be greater than 0. Default is 1.
+        diff_order : int or Sequence(int, int), optional
+            The difference order of the penalty. Default is 2 (second order difference).
+
+        Raises
+        ------
+        ValueError
+            Raised if `spline_degree` is less than 0 or if `diff_order` is less than 1
+            or greater than or equal to the number of spline basis functions
+            (``num_knots + spline_degree - 1``).
+
+        """
+        self.x = _check_array(x, dtype=float, check_finite=check_finite, ensure_1d=True)
+        self.z = _check_array(z, dtype=float, check_finite=check_finite, ensure_1d=True)
+        self.shape = (len(x), len(z))
+
+        self.num_knots = _check_scalar(num_knots, 2, True)[0]
+        self.diff_order = _check_scalar(diff_order, 2, True)[0]
+        self.spline_degree = _check_scalar(spline_degree, 2, True)[0]
+        self.lam = [_check_lam(val) for val in _check_scalar(lam, 2, True)[0]]
+
+        self.knots_1 = _spline_knots(self.x, self.num_knots[0], self.spline_degree[0], True)
+        self.basis_1 = _spline_basis(self.x, self.knots_1, self.spline_degree[0])
+
+        self.knots_2 = _spline_knots(self.z, self.num_knots[1], self.spline_degree[1], True)
+        self.basis_2 = _spline_basis(self.z, self.knots_2, self.spline_degree[1])
+        self._num_bases = np.array([self.basis_1.shape[1], self.basis_2.shape[1]])
+
+        self.basis = sparse.kron(self.basis_2, self.basis_1)
+        self.coef = None
+
+        D1 = difference_matrix(self._num_bases[0], self.diff_order[0])
+        D2 = difference_matrix(self._num_bases[1], self.diff_order[1])
+
+        P1 = self.lam[0] * sparse.kron(D1.T @ D1, sparse.identity(self._num_bases[1]))
+        P2 = self.lam[1] * sparse.kron(sparse.identity(self._num_bases[0]), D2.T @ D2)
+        self.penalty = P1 + P2
+
+        if (self.diff_order >= self._num_bases).any():
+            raise ValueError((
+                'the difference order must be less than the number of basis '
+                'functions, which is the number of knots + spline degree - 1'
+            ))
+        elif (self.spline_degree < 0).any():
+            raise ValueError('spline degree must be greater than or equal to 0')
+
+    def same_basis(self, num_knots=100, spline_degree=3):
+        """
+        Sees if the current basis is equivalent to the input number of knots of spline degree.
+
+        Parameters
+        ----------
+        num_knots : int, optional
+            The number of knots for the new spline. Default is 100.
+        spline_degree : int, optional
+            The degree of the new spline. Default is 3.
+
+        Returns
+        -------
+        bool
+            True if the input number of knots and spline degree are equivalent to the current
+            spline basis of the object.
+
+        """
+        return False  # TODO will need to check both basis matrices
+
+    def reset_penalty_diagonals(self, lam=1, diff_order=2, allow_lower=True, reverse_diags=False):
+        """
+        Resets the penalty diagonals of the system and all of the attributes.
+
+        Useful for reusing the penalty diagonals without having to recalculate the spline basis.
+
+        Parameters
+        ----------
+        lam : float, optional
+            The penalty factor applied to the difference matrix. Larger values produce
+            smoother results. Must be greater than 0. Default is 1.
+        diff_order : int, optional
+            The difference order of the penalty. Default is 2 (second order difference).
+        allow_lower : bool, optional
+            If True (default), will allow only using the lower bands of the penalty matrix,
+            which allows using :func:`scipy.linalg.solveh_banded` instead of the slightly
+            slower :func:`scipy.linalg.solve_banded`.
+        reverse_diags : bool, optional
+            If True, will reverse the order of the diagonals of the squared difference
+            matrix. If False (default), will never reverse the diagonals.
+
+        Notes
+        -----
+        `allow_pentapy` is always set to False since the time needed to go from a lower to full
+        banded matrix and shifting the rows removes any speedup from using pentapy's solver. It
+        also reduces the complexity of setting up the equations.
+
+        Adds padding to the penalty diagonals to accomodate the different shapes of the spline
+        basis and the penalty to speed up calculations when the two are added.
+
+        """
+
+    def solve_pspline(self, y, weights, penalty=None, rhs_extra=None):
+        """
+        Solves the coefficients for a weighted penalized spline.
+
+        Solves the linear equation ``(B.T @ W @ B + P) c = B.T @ W @ y`` for the spline
+        coefficients, `c`, given the spline basis, `B`, the weights (diagonal of `W`), the
+        penalty `P`, and `y`, and returns the resulting spline, ``B @ c``. Attempts to
+        calculate ``B.T @ W @ B`` and ``B.T @ W @ y`` as a banded system to speed up
+        the calculation.
+
+        Parameters
+        ----------
+        y : numpy.ndarray, shape (N,)
+            The y-values for fitting the spline.
+        weights : numpy.ndarray, shape (N,)
+            The weights for each y-value.
+        penalty : numpy.ndarray, shape (D, N)
+            The finite difference penalty matrix, in LAPACK's lower banded format (see
+            :func:`scipy.linalg.solveh_banded`) if `lower_only` is True or the full banded
+            format (see :func:`scipy.linalg.solve_banded`) if `lower_only` is False.
+        rhs_extra : float or numpy.ndarray, shape (N,), optional
+            If supplied, `rhs_extra` will be added to the right hand side (``B.T @ W @ y``)
+            of the equation before solving. Default is None, which adds nothing.
+
+        Returns
+        -------
+        numpy.ndarray, shape (N,)
+            The spline, corresponding to ``B @ c``, where `c` are the solved spline
+            coefficients and `B` is the spline basis.
+
+        """
+        # TODO investigate whether the other algorithm in Eilers's paper is more efficient
+        # memory- or time-wise
+        CWT = self.basis.multiply(
+            np.repeat(
+                weights, self._num_bases[0] * self._num_bases[1]
+            ).reshape(self.shape[0] * self.shape[1], -1)
+        ).T
+        CWC = CWT @ self.basis
+        CWy = CWT @ y
+
+        self.coef = spsolve(CWC + self.penalty, CWy)
+
+        return self.basis @ self.coef
diff --git a/pybaselines/two_d/api.py b/pybaselines/two_d/api.py
@@ -9,10 +9,11 @@
 from .morphological import _Morphological
 from .polynomial import _Polynomial
 from .smooth import _Smooth
+from .spline import _Spline
 
 
 class Baseline2D(
-    _Morphological, _Polynomial, _Smooth
+    _Morphological, _Polynomial, _Smooth, _Spline
 ):
     """
     A class for all 2D baseline correction algorithms.
@@ -26,7 +27,7 @@ class Baseline2D(
         The x-values of the measured data. Default is None, which will create an
         array from -1 to 1 during the first function call with length equal to the
         input data length.
-    z_data : array-like, shape (N,), optional
+    z_data : array-like, shape (L,), optional
         The z-values of the measured data. Default is None, which will create an
         array from -1 to 1 during the first function call with length equal to the
         input data length.

diff --git a/pybaselines/two_d/spline.py b/pybaselines/two_d/spline.py
@@ -0,0 +1,570 @@
+# -*- coding: utf-8 -*-
+"""Functions for fitting baselines using splines.
+
+Created on April 25, 2023
+@author: Donald Erb
+
+"""
+
+import warnings
+
+import numpy as np
+
+from .. import _weighting
+from ..utils import ParameterWarning, relative_difference
+from ._algorithm_setup import _Algorithm2D
+
+
+class _Spline(_Algorithm2D):
+    """A base class for all spline algorithms."""
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def irsqr(self, data, lam=1e3, quantile=0.05, num_knots=25, spline_degree=3,
+              diff_order=3, max_iter=100, tol=1e-6, weights=None, eps=None):
+        """
+        Iterative Reweighted Spline Quantile Regression (IRSQR).
+
+        Fits the baseline using quantile regression with penalized splines.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        quantile : float, optional
+            The quantile at which to fit the baseline. Default is 0.05.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 3
+            (third order differential matrix). Typical values are 3, 2, or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 100.
+        tol : float, optional
+            The exit criteria. Default is 1e-6.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+        eps : float, optional
+            A small value added to the square of the residual to prevent dividing by 0.
+            Default is None, which uses the square of the maximum-absolute-value of the
+            fit each iteration multiplied by 1e-6.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        Raises
+        ------
+        ValueError
+            Raised if quantile is not between 0 and 1.
+
+        References
+        ----------
+        Han, Q., et al. Iterative Reweighted Quantile Regression Using Augmented Lagrangian
+        Optimization for Baseline Correction. 2018 5th International Conference on Information
+        Science and Control Engineering (ICISCE), 2018, 280-284.
+
+        """
+        if not 0 < quantile < 1:
+            raise ValueError('quantile must be between 0 and 1')
+
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam
+        )
+        old_coef = np.zeros(self.pspline._num_bases[0] * self.pspline._num_bases[1])
+        tol_history = np.empty(max_iter + 1)
+        for i in range(max_iter + 1):
+            baseline = self.pspline.solve_pspline(y, weight_array)
+            calc_difference = relative_difference(old_coef, self.pspline.coef)
+            tol_history[i] = calc_difference
+            if calc_difference < tol:
+                break
+            old_coef = self.pspline.coef
+            weight_array = _weighting._quantile(y, baseline, quantile, eps)
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+
+        return baseline, params
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def pspline_asls(self, data, lam=1e3, p=1e-2, num_knots=25, spline_degree=3, diff_order=2,
+                     max_iter=50, tol=1e-3, weights=None):
+        """
+        A penalized spline version of the asymmetric least squares (AsLS) algorithm.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        p : float, optional
+            The penalizing weighting factor. Must be between 0 and 1. Values greater
+            than the baseline will be given `p` weight, and values less than the baseline
+            will be given `p - 1` weight. Default is 1e-2.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 2
+            (second order differential matrix). Typical values are 2 or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 50.
+        tol : float, optional
+            The exit criteria. Default is 1e-3.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+        x_data : array-like, shape (N,), optional
+            The x-values of the measured data. Default is None, which will create an
+            array from -1 to 1 with N points.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        Raises
+        ------
+        ValueError
+            Raised if `p` is not between 0 and 1.
+
+        See Also
+        --------
+        pybaselines.whittaker.asls
+
+        References
+        ----------
+        Eilers, P. A Perfect Smoother. Analytical Chemistry, 2003, 75(14), 3631-3636.
+
+        Eilers, P., et al. Baseline correction with asymmetric least squares smoothing.
+        Leiden University Medical Centre Report, 2005, 1(1).
+
+        Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary
+        Reviews: Computational Statistics, 2010, 2(6), 637-653.
+
+        """
+        if not 0 < p < 1:
+            raise ValueError('p must be between 0 and 1')
+
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam
+        )
+        tol_history = np.empty(max_iter + 1)
+        for i in range(max_iter + 1):
+            baseline = self.pspline.solve_pspline(y, weight_array)
+            new_weights = _weighting._asls(y, baseline, p)
+            calc_difference = relative_difference(weight_array, new_weights)
+            tol_history[i] = calc_difference
+            if calc_difference < tol:
+                break
+            weight_array = new_weights
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+
+        return baseline, params
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def pspline_airpls(self, data, lam=1e3, num_knots=25, spline_degree=3,
+                       diff_order=2, max_iter=50, tol=1e-3, weights=None):
+        """
+        A penalized spline version of the airPLS algorithm.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 2
+            (second order differential matrix). Typical values are 2 or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 50.
+        tol : float, optional
+            The exit criteria. Default is 1e-3.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        See Also
+        --------
+        pybaselines.whittaker.airpls
+
+        References
+        ----------
+        Zhang, Z.M., et al. Baseline correction using adaptive iteratively
+        reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
+
+        Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary
+        Reviews: Computational Statistics, 2010, 2(6), 637-653.
+
+        """
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam, copy_weights=True
+        )
+
+        y_l1_norm = np.abs(y).sum()
+        tol_history = np.empty(max_iter + 1)
+        for i in range(1, max_iter + 2):
+            try:
+                output = self.pspline.solve_pspline(y, weight_array)
+            except np.linalg.LinAlgError:
+                warnings.warn(
+                    ('error occurred during fitting, indicating that "tol"'
+                     ' is too low, "max_iter" is too high, or "lam" is too high'),
+                    ParameterWarning
+                )
+                i -= 1  # reduce i so that output tol_history indexing is correct
+                break
+            else:
+                baseline = output
+
+            residual = y - baseline
+            neg_mask = residual < 0
+            neg_residual = residual[neg_mask]
+            if len(neg_residual) < 2:
+                # exit if there are < 2 negative residuals since all points or all but one
+                # point would get a weight of 0, which fails the solver
+                warnings.warn(
+                    ('almost all baseline points are below the data, indicating that "tol"'
+                     ' is too low and/or "max_iter" is too high'), ParameterWarning
+                )
+                i -= 1  # reduce i so that output tol_history indexing is correct
+                break
+
+            residual_l1_norm = abs(neg_residual.sum())
+            calc_difference = residual_l1_norm / y_l1_norm
+            tol_history[i - 1] = calc_difference
+            if calc_difference < tol:
+                break
+            # only use negative residual in exp to avoid exponential overflow warnings
+            # and accidently creating a weight of nan (inf * 0 = nan)
+            weight_array[neg_mask] = np.exp(i * neg_residual / residual_l1_norm)
+            weight_array[~neg_mask] = 0
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+
+        return baseline, params
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def pspline_arpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order=2,
+                      max_iter=50, tol=1e-3, weights=None):
+        """
+        A penalized spline version of the arPLS algorithm.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 2
+            (second order differential matrix). Typical values are 2 or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 50.
+        tol : float, optional
+            The exit criteria. Default is 1e-3.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        See Also
+        --------
+        pybaselines.whittaker.arpls
+
+        References
+        ----------
+        Baek, S.J., et al. Baseline correction using asymmetrically reweighted
+        penalized least squares smoothing. Analyst, 2015, 140, 250-257.
+
+        Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary
+        Reviews: Computational Statistics, 2010, 2(6), 637-653.
+
+        """
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam
+        )
+        tol_history = np.empty(max_iter + 1)
+        for i in range(max_iter + 1):
+            baseline = self.pspline.solve_pspline(y, weight_array)
+            new_weights = _weighting._arpls(y, baseline)
+            calc_difference = relative_difference(weight_array, new_weights)
+            tol_history[i] = calc_difference
+            if calc_difference < tol:
+                break
+            weight_array = new_weights
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+
+        return baseline, params
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def pspline_iarpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order=2,
+                       max_iter=50, tol=1e-3, weights=None):
+        """
+        A penalized spline version of the IarPLS algorithm.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 2
+            (second order differential matrix). Typical values are 2 or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 50.
+        tol : float, optional
+            The exit criteria. Default is 1e-3.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+        x_data : array-like, shape (N,), optional
+            The x-values of the measured data. Default is None, which will create an
+            array from -1 to 1 with N points.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        See Also
+        --------
+        pybaselines.whittaker.iarpls
+
+        References
+        ----------
+        Ye, J., et al. Baseline correction method based on improved asymmetrically
+        reweighted penalized least squares for Raman spectrum. Applied Optics, 2020,
+        59, 10933-10943.
+
+        Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary
+        Reviews: Computational Statistics, 2010, 2(6), 637-653.
+
+        """
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam
+        )
+        tol_history = np.empty(max_iter + 1)
+        for i in range(1, max_iter + 2):
+            baseline = self.pspline.solve_pspline(y, weight_array)
+            new_weights = _weighting._iarpls(y, baseline, i)
+            calc_difference = relative_difference(weight_array, new_weights)
+            tol_history[i - 1] = calc_difference
+            if not np.isfinite(calc_difference):
+                # catches nan, inf and -inf due to exp(i) being too high or if there
+                # are too few negative residuals; no way to catch both conditions before
+                # new_weights calculation since it is hard to estimate if
+                # (exp(i) / std) * residual will overflow; check calc_difference rather
+                # than checking new_weights since non-finite values rarely occur and
+                # checking a scalar is faster; cannot use np.errstate since it is not 100% reliable
+                warnings.warn(
+                    ('nan and/or +/- inf occurred in weighting calculation, likely meaning '
+                     '"tol" is too low and/or "max_iter" is too high'), ParameterWarning
+                )
+                break
+            elif calc_difference < tol:
+                break
+            weight_array = new_weights
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+
+        return baseline, params
+
+    @_Algorithm2D._register(
+        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
+    )
+    def pspline_psalsa(self, data, lam=1e3, p=0.5, k=None, num_knots=25, spline_degree=3,
+                       diff_order=2, max_iter=50, tol=1e-3, weights=None):
+        """
+        A penalized spline version of the psalsa algorithm.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points. Must not
+            contain missing data (NaN) or Inf.
+        lam : float, optional
+            The smoothing parameter. Larger values will create smoother baselines.
+            Default is 1e3.
+        p : float, optional
+            The penalizing weighting factor. Must be between 0 and 1. Values greater
+            than the baseline will be given `p` weight, and values less than the baseline
+            will be given `p - 1` weight. Default is 0.5.
+        k : float, optional
+            A factor that controls the exponential decay of the weights for baseline
+            values greater than the data. Should be approximately the height at which
+            a value could be considered a peak. Default is None, which sets `k` to
+            one-tenth of the standard deviation of the input data. A large k value
+            will produce similar results to :meth:`.asls`.
+        num_knots : int, optional
+            The number of knots for the spline. Default is 25.
+        spline_degree : int, optional
+            The degree of the spline. Default is 3, which is a cubic spline.
+        diff_order : int, optional
+            The order of the differential matrix. Must be greater than 0. Default is 2
+            (second order differential matrix). Typical values are 2 or 1.
+        max_iter : int, optional
+            The max number of fit iterations. Default is 50.
+        tol : float, optional
+            The exit criteria. Default is 1e-3.
+        weights : array-like, shape (N,), optional
+            The weighting array. If None (default), then the initial weights
+            will be an array with size equal to N and all values set to 1.
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The calculated baseline.
+        params : dict
+            A dictionary with the following items:
+
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data.
+            * 'tol_history': numpy.ndarray
+                An array containing the calculated tolerance values for
+                each iteration. The length of the array is the number of iterations
+                completed. If the last value in the array is greater than the input
+                `tol` value, then the function did not converge.
+
+        Raises
+        ------
+        ValueError
+            Raised if `p` is not between 0 and 1.
+
+        See Also
+        --------
+        pybaselines.whittaker.psalsa
+
+        References
+        ----------
+        Oller-Moreno, S., et al. Adaptive Asymmetric Least Squares baseline estimation
+        for analytical instruments. 2014 IEEE 11th International Multi-Conference on
+        Systems, Signals, and Devices, 2014, 1-5.
+
+        Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary
+        Reviews: Computational Statistics, 2010, 2(6), 637-653.
+
+        """
+        if not 0 < p < 1:
+            raise ValueError('p must be between 0 and 1')
+
+        y, weight_array = self._setup_spline(
+            data, weights, spline_degree, num_knots, True, diff_order, lam
+        )
+        if k is None:
+            k = np.std(y) / 10
+        tol_history = np.empty(max_iter + 1)
+        for i in range(max_iter + 1):
+            baseline = self.pspline.solve_pspline(y, weight_array)
+            new_weights = _weighting._psalsa(y, baseline, p, k, len(y))  # TODO replace len(y) with self._shape or whatever
+            calc_difference = relative_difference(weight_array, new_weights)
+            tol_history[i] = calc_difference
+            if calc_difference < tol:
+                break
+            weight_array = new_weights
+
+        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+
+        return baseline, params