diff --git a/chemotools/baseline/__init__.py b/chemotools/baseline/__init__.py index a80779a..a78be79 100644 --- a/chemotools/baseline/__init__.py +++ b/chemotools/baseline/__init__.py @@ -1,8 +1,8 @@ -from .air_pls import AirPls -from .ar_pls import ArPls -from .constant_baseline_correction import ConstantBaselineCorrection -from .cubic_spline_correction import CubicSplineCorrection -from .linear_correction import LinearCorrection -from .non_negative import NonNegative -from .polynomial_correction import PolynomialCorrection -from .subtract_reference import SubtractReference \ No newline at end of file +from ._air_pls import AirPls +from ._ar_pls import ArPls +from ._constant_baseline_correction import ConstantBaselineCorrection +from ._cubic_spline_correction import CubicSplineCorrection +from ._linear_correction import LinearCorrection +from ._non_negative import NonNegative +from ._polynomial_correction import PolynomialCorrection +from ._subtract_reference import SubtractReference \ No newline at end of file diff --git a/chemotools/baseline/air_pls.py b/chemotools/baseline/_air_pls.py similarity index 92% rename from chemotools/baseline/air_pls.py rename to chemotools/baseline/_air_pls.py index ace4efd..7e82cfa 100644 --- a/chemotools/baseline/air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -30,14 +30,6 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the accuracy of the baseline correction, but also increases the computation time. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - A flag indicating whether the estimator has been fitted to data. - Methods ------- fit(X, y=None) @@ -85,13 +77,7 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": Returns the instance itself. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -113,7 +99,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/baseline/ar_pls.py b/chemotools/baseline/_ar_pls.py similarity index 91% rename from chemotools/baseline/ar_pls.py rename to chemotools/baseline/_ar_pls.py index e4e0173..064621d 100644 --- a/chemotools/baseline/ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -29,13 +29,6 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): nr_iterations : int, optional (default=100) The maximum number of iterations for the weight updating scheme. - Attributes - ---------- - n_features_in_ : int - The number of input features. - - _is_fitted : bool - Whether the estimator has been fitted. Methods ------- @@ -86,13 +79,7 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -114,7 +101,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/baseline/constant_baseline_correction.py b/chemotools/baseline/_constant_baseline_correction.py similarity index 89% rename from chemotools/baseline/constant_baseline_correction.py rename to chemotools/baseline/_constant_baseline_correction.py index 93e7c2a..f1bf33f 100644 --- a/chemotools/baseline/constant_baseline_correction.py +++ b/chemotools/baseline/_constant_baseline_correction.py @@ -30,12 +30,6 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme end_index_ : int The index of the end of the range. It is 1 if the wavenumbers are not provided. - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -46,7 +40,10 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme """ def __init__( - self, start: int = 0, end: int = 1, wavenumbers: np.ndarray = None, + self, + start: int = 0, + end: int = 1, + wavenumbers: np.ndarray = None, ) -> None: self.start = start self.end = end @@ -70,13 +67,7 @@ def fit(self, X: np.ndarray, y=None) -> "ConstantBaselineCorrection": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) # Set the start and end indices if self.wavenumbers is None: @@ -109,7 +100,7 @@ def transform(self, X: np.ndarray, y=0, copy=True) -> np.ndarray: The transformed input data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, ["start_index_", "end_index_"]) # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/baseline/cubic_spline_correction.py b/chemotools/baseline/_cubic_spline_correction.py similarity index 86% rename from chemotools/baseline/cubic_spline_correction.py rename to chemotools/baseline/_cubic_spline_correction.py index 5b95259..daa1b66 100644 --- a/chemotools/baseline/cubic_spline_correction.py +++ b/chemotools/baseline/_cubic_spline_correction.py @@ -5,9 +5,10 @@ from chemotools.utils.check_inputs import check_input + class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): """ - A transformer that corrects a baseline by subtracting a cubic spline through the + A transformer that corrects a baseline by subtracting a cubic spline through the points defined by the indices. Parameters @@ -32,6 +33,7 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi Transform the input data by subtracting the constant baseline value. """ + def __init__(self, indices: list = None) -> None: self.indices = indices @@ -53,13 +55,7 @@ def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) if self.indices is None: self.indices_ = [0, len(X[0]) - 1] @@ -89,7 +85,7 @@ def transform(self, X: np.ndarray, y=None, copy=True): The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "indices_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -97,7 +93,9 @@ def transform(self, X: np.ndarray, y=None, copy=True): # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Calculate spline baseline correction for i, x in enumerate(X_): @@ -106,7 +104,7 @@ def transform(self, X: np.ndarray, y=None, copy=True): def _spline_baseline_correct(self, x: np.ndarray) -> np.ndarray: indices = self.indices_ - intensity = x[indices] + intensity = x[indices] spl = CubicSpline(indices, intensity) - baseline = spl(range(len(x))) - return x - baseline \ No newline at end of file + baseline = spl(range(len(x))) + return x - baseline diff --git a/chemotools/baseline/linear_correction.py b/chemotools/baseline/_linear_correction.py similarity index 86% rename from chemotools/baseline/linear_correction.py rename to chemotools/baseline/_linear_correction.py index 2e7a971..bfe3fd7 100644 --- a/chemotools/baseline/linear_correction.py +++ b/chemotools/baseline/_linear_correction.py @@ -10,17 +10,6 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): A transformer that corrects a baseline by subtracting a linear baseline through the initial and final points of the spectrum. - Parameters - ---------- - - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -68,13 +57,7 @@ def fit(self, X: np.ndarray, y=None) -> "LinearCorrection": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -99,7 +82,7 @@ def transform(self, X: np.ndarray, y=0, copy=True) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/baseline/non_negative.py b/chemotools/baseline/_non_negative.py similarity index 85% rename from chemotools/baseline/non_negative.py rename to chemotools/baseline/_non_negative.py index 927401b..f082761 100644 --- a/chemotools/baseline/non_negative.py +++ b/chemotools/baseline/_non_negative.py @@ -14,14 +14,6 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): mode : str, optional The mode to use for the non-negative values. Can be "zero" or "abs". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -52,13 +44,7 @@ def fit(self, X: np.ndarray, y=None) -> "NonNegative": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -80,7 +66,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/baseline/polynomial_correction.py b/chemotools/baseline/_polynomial_correction.py similarity index 83% rename from chemotools/baseline/polynomial_correction.py rename to chemotools/baseline/_polynomial_correction.py index 354feb7..dad997a 100644 --- a/chemotools/baseline/polynomial_correction.py +++ b/chemotools/baseline/_polynomial_correction.py @@ -4,9 +4,10 @@ from chemotools.utils.check_inputs import check_input + class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): """ - A transformer that subtracts a polynomial baseline from the input data. The polynomial is + A transformer that subtracts a polynomial baseline from the input data. The polynomial is fitted to the points in the spectrum specified by the indices parameter. Parameters @@ -18,14 +19,6 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin The indices of the points in the spectrum to fit the polynomial to. Defaults to None, which fits the polynomial to all points in the spectrum (equivalent to detrend). - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -37,6 +30,7 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin _baseline_correct_spectrum(x) Subtract the polynomial baseline from a single spectrum. """ + def __init__(self, order: int = 1, indices: list = None) -> None: self.order = order self.indices = indices @@ -59,13 +53,7 @@ def fit(self, X: np.ndarray, y=None) -> "PolynomialCorrection": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) if self.indices is None: self.indices_ = range(0, len(X[0])) @@ -73,8 +61,8 @@ def fit(self, X: np.ndarray, y=None) -> "PolynomialCorrection": self.indices_ = self.indices return self - - def transform(self, X: np.ndarray, y:int=0, copy:bool=True) -> np.ndarray: + + def transform(self, X: np.ndarray, y: int = 0, copy: bool = True) -> np.ndarray: """ Transform the input data by subtracting the polynomial baseline. @@ -95,7 +83,7 @@ def transform(self, X: np.ndarray, y:int=0, copy:bool=True) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "indices_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -103,13 +91,15 @@ def transform(self, X: np.ndarray, y:int=0, copy:bool=True) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Calculate polynomial baseline correction for i, x in enumerate(X_): X_[i] = self._baseline_correct_spectrum(x) return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - + def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray: """ Subtract the polynomial baseline from a single spectrum. @@ -126,5 +116,5 @@ def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray: """ intensity = x[self.indices_] poly = np.polyfit(self.indices_, intensity, self.order) - baseline = [np.polyval(poly, i) for i in range(0, len(x))] - return x - baseline \ No newline at end of file + baseline = [np.polyval(poly, i) for i in range(0, len(x))] + return x - baseline diff --git a/chemotools/baseline/subtract_reference.py b/chemotools/baseline/_subtract_reference.py similarity index 85% rename from chemotools/baseline/subtract_reference.py rename to chemotools/baseline/_subtract_reference.py index 33eff64..3503823 100644 --- a/chemotools/baseline/subtract_reference.py +++ b/chemotools/baseline/_subtract_reference.py @@ -15,14 +15,6 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The reference spectrum to subtract from the input data. If None, the original spectrum is returned. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -34,6 +26,7 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): _subtract_reference(x) Subtract the reference spectrum from a single spectrum. """ + def __init__( self, reference: np.ndarray = None, @@ -58,20 +51,13 @@ def fit(self, X: np.ndarray, y=None) -> "SubtractReference": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) # Set the reference - if self.reference is not None: self.reference_ = self.reference.copy() return self - + return self def transform(self, X: np.ndarray, y=None) -> np.ndarray: @@ -92,7 +78,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -100,7 +86,9 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) if self.reference is None: return X_.reshape(-1, 1) if X_.ndim == 1 else X_ diff --git a/chemotools/derivative/__init__.py b/chemotools/derivative/__init__.py index a08dc2d..1df25f5 100644 --- a/chemotools/derivative/__init__.py +++ b/chemotools/derivative/__init__.py @@ -1,2 +1,2 @@ -from .norris_william import NorrisWilliams -from .savitzky_golay import SavitzkyGolay \ No newline at end of file +from ._norris_william import NorrisWilliams +from ._savitzky_golay import SavitzkyGolay \ No newline at end of file diff --git a/chemotools/derivative/norris_william.py b/chemotools/derivative/_norris_william.py similarity index 91% rename from chemotools/derivative/norris_william.py rename to chemotools/derivative/_norris_william.py index ebf8016..be25ec4 100644 --- a/chemotools/derivative/norris_william.py +++ b/chemotools/derivative/_norris_william.py @@ -22,17 +22,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The order of the derivative to calculate. Can be 1 or 2. Default is 1. mode : str, optional - The mode to use for the derivative calculation. Can be "nearest", "constant", + The mode to use for the derivative calculation. Can be "nearest", "constant", "reflect", "wrap", "mirror" or "interp". Default is "nearest". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -41,6 +33,7 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): transform(X, y=0, copy=True) Transform the input data by calculating the Norris-Williams derivative. """ + def __init__( self, window_size: int = 5, @@ -71,13 +64,7 @@ def fit(self, X: np.ndarray, y=None) -> "NorrisWilliams": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -99,7 +86,7 @@ def transform(self, X: np.ndarray, y=None): The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/derivative/savitzky_golay.py b/chemotools/derivative/_savitzky_golay.py similarity index 89% rename from chemotools/derivative/savitzky_golay.py rename to chemotools/derivative/_savitzky_golay.py index 7e94f20..f9a8e6c 100644 --- a/chemotools/derivative/savitzky_golay.py +++ b/chemotools/derivative/_savitzky_golay.py @@ -27,14 +27,6 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The mode to use for the derivative calculation. Can be "nearest", "constant", "reflect", "wrap", "mirror" or "interp". Default is "nearest". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -74,13 +66,7 @@ def fit(self, X: np.ndarray, y=None) -> "SavitzkyGolay": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -102,7 +88,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/feature_selection/_index_selector.py b/chemotools/feature_selection/_index_selector.py index 3523c41..ed85750 100644 --- a/chemotools/feature_selection/_index_selector.py +++ b/chemotools/feature_selection/_index_selector.py @@ -31,12 +31,6 @@ class IndexSelector(BaseEstimator, SelectorMixin): features_index_ : int The index of the features to select. - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -96,7 +90,7 @@ def _get_support_mask(self): Returns ------- - mask : ndarray of shape (n_features,) + mask : ndarray of shape (n_features_in_,) The mask indicating the selected features. """ # Check that the estimator is fitted diff --git a/chemotools/scale/__init__.py b/chemotools/scale/__init__.py index 9630c8c..64e200f 100644 --- a/chemotools/scale/__init__.py +++ b/chemotools/scale/__init__.py @@ -1,3 +1,3 @@ -from .min_max_scaler import MinMaxScaler -from .norm_scaler import NormScaler -from .point_scaler import PointScaler +from ._min_max_scaler import MinMaxScaler +from ._norm_scaler import NormScaler +from ._point_scaler import PointScaler diff --git a/chemotools/scale/min_max_scaler.py b/chemotools/scale/_min_max_scaler.py similarity index 80% rename from chemotools/scale/min_max_scaler.py rename to chemotools/scale/_min_max_scaler.py index c1e8b0e..95e5b89 100644 --- a/chemotools/scale/min_max_scaler.py +++ b/chemotools/scale/_min_max_scaler.py @@ -8,23 +8,15 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): """ A transformer that scales the input data by subtracting the minimum and dividing by - the difference between the maximum and the minimum. When the use_min parameter is False, + the difference between the maximum and the minimum. When the use_min parameter is False, the data is scaled by the maximum. Parameters ---------- use_min : bool, default=True - The normalization to use. If True, the data is subtracted by the minimum and + The normalization to use. If True, the data is subtracted by the minimum and scaled by the maximum. If False, the data is scaled by the maximum. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -55,13 +47,7 @@ def fit(self, X: np.ndarray, y=None) -> "MinMaxScaler": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -83,7 +69,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -97,8 +83,9 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Normalize the data by the maximum value if self.use_min: - X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (np.max( - X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)) + X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / ( + np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True) + ) else: X_ = X_ / np.max(X_, axis=1, keepdims=True) diff --git a/chemotools/scale/norm_scaler.py b/chemotools/scale/_norm_scaler.py similarity index 82% rename from chemotools/scale/norm_scaler.py rename to chemotools/scale/_norm_scaler.py index 865ef26..96e77ef 100644 --- a/chemotools/scale/norm_scaler.py +++ b/chemotools/scale/_norm_scaler.py @@ -12,15 +12,7 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Parameters ---------- l_norm : int, optional - The L-norm to use. Default is 2. - - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. + The L-norm to use. Default is 2. Methods ------- @@ -30,13 +22,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): transform(X, y=0, copy=True) Transform the input data by scaling by the L-norm. """ + def __init__(self, l_norm: int = 2): self.l_norm = l_norm def fit(self, X: np.ndarray, y=None) -> "NormScaler": """ Fit the transformer to the input data. - + Parameters ---------- X : np.ndarray of shape (n_samples, n_features) @@ -51,13 +44,7 @@ def fit(self, X: np.ndarray, y=None) -> "NormScaler": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -79,7 +66,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/scale/point_scaler.py b/chemotools/scale/_point_scaler.py similarity index 83% rename from chemotools/scale/point_scaler.py rename to chemotools/scale/_point_scaler.py index 24c6de7..e6ff3df 100644 --- a/chemotools/scale/point_scaler.py +++ b/chemotools/scale/_point_scaler.py @@ -7,12 +7,12 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): """ - A transformer that scales the input data by the intensity value at a given point. + A transformer that scales the input data by the intensity value at a given point. The point can be specified by an index or by a wavenumber. Parameters ---------- - point : int, + point : int, The point to scale the data by. It can be an index or a wavenumber. wavenumber : array-like, optional @@ -25,12 +25,6 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): point_index_ : int The index of the point to scale the data by. It is 0 if the wavenumbers are not provided. - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -39,11 +33,11 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): transform(X, y=0, copy=True) Transform the input data by scaling by the value at a given Point. """ + def __init__(self, point: int = 0, wavenumbers: np.ndarray = None): self.point = point self.wavenumbers = wavenumbers - def fit(self, X: np.ndarray, y=None) -> "PointScaler": """ Fit the transformer to the input data. @@ -62,13 +56,7 @@ def fit(self, X: np.ndarray, y=None) -> "PointScaler": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) # Set the point index if self.wavenumbers is None: @@ -76,7 +64,6 @@ def fit(self, X: np.ndarray, y=None) -> "PointScaler": else: self.point_index_ = self._find_index(self.point) - return self def transform(self, X: np.ndarray, y=None) -> np.ndarray: @@ -97,7 +84,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "point_index_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -105,14 +92,16 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Scale the data by Point for i, x in enumerate(X_): X_[i] = x / x[self.point_index_] - + return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - + def _find_index(self, target: float) -> int: wavenumbers = np.array(self.wavenumbers) - return np.argmin(np.abs(wavenumbers - target)) \ No newline at end of file + return np.argmin(np.abs(wavenumbers - target)) diff --git a/chemotools/scatter/__init__.py b/chemotools/scatter/__init__.py index ab065a6..b622efe 100644 --- a/chemotools/scatter/__init__.py +++ b/chemotools/scatter/__init__.py @@ -1,4 +1,4 @@ -from .extended_multiplicative_scatter_correction import ExtendedMultiplicativeScatterCorrection -from .multiplicative_scatter_correction import MultiplicativeScatterCorrection -from .robust_normal_variate import RobustNormalVariate -from .standard_normal_variate import StandardNormalVariate \ No newline at end of file +from ._extended_multiplicative_scatter_correction import ExtendedMultiplicativeScatterCorrection +from ._multiplicative_scatter_correction import MultiplicativeScatterCorrection +from ._robust_normal_variate import RobustNormalVariate +from ._standard_normal_variate import StandardNormalVariate \ No newline at end of file diff --git a/chemotools/scatter/extended_multiplicative_scatter_correction.py b/chemotools/scatter/_extended_multiplicative_scatter_correction.py similarity index 95% rename from chemotools/scatter/extended_multiplicative_scatter_correction.py rename to chemotools/scatter/_extended_multiplicative_scatter_correction.py index 927d201..1a504c4 100644 --- a/chemotools/scatter/extended_multiplicative_scatter_correction.py +++ b/chemotools/scatter/_extended_multiplicative_scatter_correction.py @@ -37,8 +37,6 @@ class ExtendedMultiplicativeScatterCorrection( ---------- reference_ : np.ndarray The reference spectrum used for the correction. - n_features_in_ : int - The number of features in the training data. References ---------- @@ -82,13 +80,7 @@ def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) # Check that the length of the reference is the same as the number of features if self.reference is not None: @@ -146,7 +138,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/scatter/multiplicative_scatter_correction.py b/chemotools/scatter/_multiplicative_scatter_correction.py similarity index 96% rename from chemotools/scatter/multiplicative_scatter_correction.py rename to chemotools/scatter/_multiplicative_scatter_correction.py index bc6fa21..c61720b 100644 --- a/chemotools/scatter/multiplicative_scatter_correction.py +++ b/chemotools/scatter/_multiplicative_scatter_correction.py @@ -68,13 +68,7 @@ def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) # Check that the length of the reference is the same as the number of features if self.reference is not None: @@ -129,7 +123,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/scatter/robust_normal_variate.py b/chemotools/scatter/_robust_normal_variate.py similarity index 87% rename from chemotools/scatter/robust_normal_variate.py rename to chemotools/scatter/_robust_normal_variate.py index 7778fc5..70eba64 100644 --- a/chemotools/scatter/robust_normal_variate.py +++ b/chemotools/scatter/_robust_normal_variate.py @@ -15,14 +15,6 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin) The percentile to use for the robust normal variate. The value should be between 0 and 100. The default is 25. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -58,13 +50,7 @@ def fit(self, X: np.ndarray, y=None) -> "RobustNormalVariate": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -86,7 +72,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/scatter/standard_normal_variate.py b/chemotools/scatter/_standard_normal_variate.py similarity index 79% rename from chemotools/scatter/standard_normal_variate.py rename to chemotools/scatter/_standard_normal_variate.py index 4fac1b7..deddad8 100644 --- a/chemotools/scatter/standard_normal_variate.py +++ b/chemotools/scatter/_standard_normal_variate.py @@ -9,14 +9,6 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi """ A transformer that calculates the standard normal variate of the input data. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -25,10 +17,11 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi transform(X, y=0, copy=True) Transform the input data by calculating the standard normal variate. """ + def fit(self, X: np.ndarray, y=None) -> "StandardNormalVariate": """ Fit the transformer to the input data. - + Parameters ---------- X : np.ndarray of shape (n_samples, n_features) @@ -43,13 +36,7 @@ def fit(self, X: np.ndarray, y=None) -> "StandardNormalVariate": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -71,7 +58,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -79,7 +66,9 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Calculate the standard normal variate for i, x in enumerate(X_): @@ -88,4 +77,4 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: return X_.reshape(-1, 1) if X_.ndim == 1 else X_ def _calculate_standard_normal_variate(self, x) -> np.ndarray: - return (x - x.mean()) / x.std() \ No newline at end of file + return (x - x.mean()) / x.std() diff --git a/chemotools/smooth/__init__.py b/chemotools/smooth/__init__.py index 42167e3..b6befa6 100644 --- a/chemotools/smooth/__init__.py +++ b/chemotools/smooth/__init__.py @@ -1,4 +1,4 @@ -from .mean_filter import MeanFilter -from .median_filter import MedianFilter -from .savitzky_golay_filter import SavitzkyGolayFilter -from .whittaker_smooth import WhittakerSmooth \ No newline at end of file +from ._mean_filter import MeanFilter +from ._median_filter import MedianFilter +from ._savitzky_golay_filter import SavitzkyGolayFilter +from ._whittaker_smooth import WhittakerSmooth \ No newline at end of file diff --git a/chemotools/smooth/mean_filter.py b/chemotools/smooth/_mean_filter.py similarity index 83% rename from chemotools/smooth/mean_filter.py rename to chemotools/smooth/_mean_filter.py index dbe73de..6795d6b 100644 --- a/chemotools/smooth/mean_filter.py +++ b/chemotools/smooth/_mean_filter.py @@ -14,19 +14,11 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): ---------- window_size : int, optional The size of the window to use for the mean filter. Must be odd. Default is 3. - + mode : str, optional The mode to use for the mean filter. Can be "nearest", "constant", "reflect", "wrap", "mirror" or "interp". Default is "nearest". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -35,7 +27,8 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): transform(X, y=0, copy=True) Transform the input data by calculating the mean filter. """ - def __init__(self, window_size: int = 3, mode='nearest') -> None: + + def __init__(self, window_size: int = 3, mode="nearest") -> None: self.window_size = window_size self.mode = mode @@ -57,13 +50,7 @@ def fit(self, X: np.ndarray, y=None) -> "MeanFilter": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -85,7 +72,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/smooth/median_filter.py b/chemotools/smooth/_median_filter.py similarity index 86% rename from chemotools/smooth/median_filter.py rename to chemotools/smooth/_median_filter.py index 33f0604..3d03eb2 100644 --- a/chemotools/smooth/median_filter.py +++ b/chemotools/smooth/_median_filter.py @@ -19,14 +19,6 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The mode to use for the median filter. Can be "nearest", "constant", "reflect", "wrap", "mirror" or "interp". Default is "nearest". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -57,13 +49,7 @@ def fit(self, X: np.ndarray, y=None) -> "MedianFilter": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -85,7 +71,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/smooth/savitzky_golay_filter.py b/chemotools/smooth/_savitzky_golay_filter.py similarity index 88% rename from chemotools/smooth/savitzky_golay_filter.py rename to chemotools/smooth/_savitzky_golay_filter.py index 96f7e21..fe7769f 100644 --- a/chemotools/smooth/savitzky_golay_filter.py +++ b/chemotools/smooth/_savitzky_golay_filter.py @@ -24,14 +24,6 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin) The mode to use for the Savitzky-Golay filter. Can be "nearest", "constant", "reflect", "wrap", "mirror" or "interp". Default is "nearest". - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -40,6 +32,7 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin) transform(X, y=0, copy=True) Transform the input data by calculating the Savitzky-Golay filter. """ + def __init__( self, window_size: int = 3, polynomial_order: int = 1, mode: str = "nearest" ) -> None: @@ -65,13 +58,7 @@ def fit(self, X: np.ndarray, y=None) -> "SavitzkyGolayFilter": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + self._validate_data(X) return self @@ -93,7 +80,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X) diff --git a/chemotools/smooth/whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py similarity index 88% rename from chemotools/smooth/whittaker_smooth.py rename to chemotools/smooth/_whittaker_smooth.py index 1a64ec9..00ba7e3 100644 --- a/chemotools/smooth/whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -24,14 +24,6 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): differences : int, optional The number of differences to use for the Whittaker smooth. Default is 1. - Attributes - ---------- - n_features_in_ : int - The number of features in the input data. - - _is_fitted : bool - Whether the transformer has been fitted to data. - Methods ------- fit(X, y=None) @@ -66,13 +58,7 @@ def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": The fitted transformer. """ # Check that X is a 2D array and has only finite values - X = check_input(X) - - # Set the number of features - self.n_features_in_ = X.shape[1] - - # Set the fitted attribute to True - self._is_fitted = True + X = self._validate_data(X) return self @@ -94,7 +80,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: The transformed data. """ # Check that the estimator is fitted - check_is_fitted(self, "_is_fitted") + check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X = check_input(X)