fix(example): anomaly detection

LongxingTan · Jan 22, 2025 · 7a9d74a · 7a9d74a
1 parent 6852412
commit 7a9d74a
Show file tree

Hide file tree

Showing 15 changed files with 124 additions and 187 deletions.
diff --git a/README.md b/README.md
@@ -221,12 +221,12 @@ import tensorflow as tf
 from tensorflow.keras.layers import Input, Dense
 from tfts import AutoModel, AutoConfig
 
-def build_model():
-    train_length = 24
-    train_features = 15
-    predict_sequence_length = 8
+train_length = 24
+num_train_features = 15
+predict_sequence_length = 8
 
-    inputs = Input([train_length, train_features])
+def build_model():
+    inputs = Input([train_length, num_train_features])
     config = AutoConfig.for_model("seq2seq")
     backbone = AutoModel.from_config(config, predict_sequence_length=predict_sequence_length)
     outputs = backbone(inputs)

diff --git a/README_CN.md b/README_CN.md
@@ -219,12 +219,12 @@ import tensorflow as tf
 from tensorflow.keras.layers import Input, Dense
 from tfts import AutoModel, AutoConfig
 
-def build_model():
-    train_length = 24
-    train_features = 15
-    predict_sequence_length = 8
+train_length = 24
+num_train_features = 15
+predict_sequence_length = 8
 
-    inputs = Input([train_length, train_features])
+def build_model():
+    inputs = Input([train_length, num_train_features])
     config = AutoConfig.for_model("seq2seq")
     backbone = AutoModel.from_config(config, predict_sequence_length=predict_sequence_length)
     outputs = backbone(inputs)

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -12,4 +12,3 @@ API
    layers
    models
    trainer
-   tuner
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
@@ -124,20 +124,6 @@ Multi-variables and multi-steps prediction
 	model()
 
 
-Auto-tuned configuration
-----------------------------------------
-
-.. code-block:: python
-
-    import tensorflow as tf
-    import tfts
-    from tfts import AutoModel, AutoConfig, AutoTuner
-
-    config = AutoConfig.for_model('rnn')
-    tuner = AutoTuner('rnn')
-    tuner.run(config)
-
-
 Custom head for classification or anomaly task
 -------------------------------------------------
 

diff --git a/examples/README.md b/examples/README.md
@@ -2,9 +2,10 @@
 
 ## 🛠️ Basic Usage
 Get started with these basic examples:
-- [Time Series Prediction](./run_prediction_simple.py): Learn how to predict future values in a time series.
+- [Time Series Prediction](./run_prediction_simple.py): Learn how to predict future values.
 - [Time Series Classification](./run_classification.py): Explore how to classify time series data.
 - [Time Series Anomaly Detection](./run_anomaly.py): Detect anomalies in time series data.
+- [AutoML for parameters tuning](./run_tuner.py): Try the parameters auto-tune.
 
 
 ## 📓 Notebooks

diff --git a/examples/run_anomaly.py b/examples/run_anomaly.py
@@ -25,72 +25,88 @@ def parse_args():
     return parser.parse_args()
 
 
-def create_subseq(ts, train_length, pred_length):
-    sub_seq, next_values = [], []
-    for i in range(len(ts) - train_length - pred_length):
-        sub_seq.append(ts[i : i + train_length])
-        next_values.append(ts[i + train_length : i + train_length + pred_length].T[0])
-    return sub_seq, next_values
+def create_subsequences(time_series, train_length, pred_length):
+    """Create subsequences for training and prediction."""
+    subsequences, next_values = [], []
+    for i in range(len(time_series) - train_length - pred_length):
+        subsequences.append(time_series[i : i + train_length])
+        next_values.append(time_series[i + train_length : i + train_length + pred_length].T[0])
+    return subsequences, next_values
 
 
-def build_data():
-    df = pd.read_csv("http://www.cs.ucr.edu/~eamonn/discords/qtdbsel102.txt", header=None, delimiter="\t")
-    ecg = df.iloc[:, 2].values
-    ecg = ecg.reshape(len(ecg), -1)
-    print("length of ECG data : ", len(ecg))
+def load_and_preprocess_data(args):
+    """Load ECG data, scale it, and prepare subsequences."""
+    url = "http://www.cs.ucr.edu/~eamonn/discords/qtdbsel102.txt"
+    df = pd.read_csv(url, header=None, delimiter="\t")
+    ecg_data = df.iloc[:, 2].values.reshape(-1, 1)
 
+    print(f"Loaded ECG data of length: {len(ecg_data)}")
+
+    # Standardize the ECG data
     scaler = StandardScaler()
-    std_ecg = scaler.fit_transform(ecg)
-    std_ecg = std_ecg[:5000]
+    scaled_ecg = scaler.fit_transform(ecg_data)
 
-    sub_seq, next_values = create_subseq(std_ecg, args.train_length, args.predict_sequence_length)
-    return np.array(sub_seq), np.array(next_values), std_ecg
+    # Create subsequences for training and prediction
+    subsequences, next_values = create_subsequences(scaled_ecg, args.train_length, args.predict_sequence_length)
+    return np.array(subsequences), np.array(next_values), scaled_ecg
 
 
-def run_train(args):
-    x_test, y_test, sig = build_data()
+def train_model(args):
+    """Train the model using the specified arguments."""
+    x_train, y_train, _ = load_and_preprocess_data(args)
 
     config = AutoConfig.for_model(args.use_model)
     config.train_sequence_length = args.train_length
-    model = AutoModelForAnomaly.from_config(config, predict_sequence_length=1)
+    model = AutoModelForAnomaly.from_config(config, predict_sequence_length=args.predict_sequence_length)
 
     trainer = KerasTrainer(model)
-    trainer.train((x_test, y_test), (x_test, y_test), epochs=args.epochs)
+    trainer.train((x_train, y_train), (x_train, y_train), epochs=args.epochs)
     trainer.save_model(args.output_dir)
-    return
+    print(f"Model trained and saved to {args.output_dir}")
 
 
-def run_inference(args):
-    x_test, y_test, sig = build_data()
+def perform_inference(args):
+    """Perform inference using the trained model."""
+    x_test, y_test, _ = load_and_preprocess_data(args)
 
+    print("Starting inference...")
     config = AutoConfig.for_model(args.use_model)
     config.train_sequence_length = args.train_length
-
     model = AutoModelForAnomaly.from_pretrained(weights_dir=args.output_dir)
-    det = model.detect(x_test, y_test)
-    return sig, det
+
+    anomaly_scores = model.detect(x_test, y_test)
+    return _, anomaly_scores
 
 
-def plot(sig, det):
+def plot_results(signal, anomaly_scores):
+    """Plot the original signal and detected anomalies."""
     fig, axes = plt.subplots(nrows=2, figsize=(15, 10))
-    axes[0].plot(sig, color="b", label="original data")
-    x = np.arange(4200, 4400)
-    y1 = [-3] * len(x)
-    y2 = [3] * len(x)
-    axes[0].fill_between(x, y1, y2, facecolor="g", alpha=0.3)
 
-    axes[1].plot(det, color="r", label="Mahalanobis Distance")
+    axes[0].plot(signal, color="b", label="Original Data")
+    x_range = np.arange(4200, 4400)
+    axes[0].fill_between(x_range, -3, 3, facecolor="g", alpha=0.3)
+    axes[0].set_title("ECG Data with Anomalies")
+    axes[0].legend()
+
+    axes[1].plot(anomaly_scores, color="r", label="Mahalanobis Distance")
     axes[1].set_ylim(0, 1000)
-    y1 = [0] * len(x)
-    y2 = [1000] * len(x)
-    axes[1].fill_between(x, y1, y2, facecolor="g", alpha=0.3)
-    # plt.savefig('./anomaly.png')
+    axes[1].fill_between(x_range, 0, 1000, facecolor="g", alpha=0.3)
+    axes[1].set_title("Anomaly Detection Scores")
+    axes[1].legend()
+
+    plt.tight_layout()
     plt.show()
 
 
-if __name__ == "__main__":
+def main():
+    """Main function to orchestrate training, inference, and plotting."""
     args = parse_args()
-    run_train(args)
+    train_model(args)
+
+    # Run inference
+    signal, anomaly_scores = perform_inference(args)
+    plot_results(signal, anomaly_scores)
 
-    sig, det = run_inference(args)
-    plot(sig, det)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/run_classification.py b/examples/run_classification.py
@@ -70,9 +70,8 @@ def run_train(args):
 
     y_pred = model(x_val)
     y_pred_classes = np.argmax(y_pred, axis=1)
-    y_true_classes = np.argmax(y_val, axis=1)
 
-    cm = confusion_matrix(y_true_classes, y_pred_classes)
+    cm = confusion_matrix(y_val, y_pred_classes)
     print(cm)
     return
 

diff --git a/tfts/tuner.py → examples/run_tuner.py b/tfts/tuner.py → examples/run_tuner.py
@@ -1,12 +1,8 @@
-"""tfts auto tuner"""
+"""Demo to tune the model parameters by Autotune"""
 
 import numpy as np
 
-from tfts.models.auto_config import AutoConfig
-from tfts.models.auto_model import AutoModel
-from tfts.trainer import KerasTrainer
-
-__all__ = ["AutoTuner"]
+from tfts import AutoConfig, AutoModel, AutoModelForAnomaly, KerasTrainer, get_data
 
 
 class AutoTuner(object):
@@ -25,24 +21,19 @@ def objective(self, trial):
         num_layers = trial.suggest_int("num_layers", 1, 4)
 
         # Suggest training parameters
-        learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)
-        epochs = trial.suggest_int("epochs", 5, 50)
+        learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2)
+        epochs = trial.suggest_int("epochs", 10, 50)
 
         # Create model config
-        config = AutoConfig.for_model(
-            self.use_model,
-            hidden_units=hidden_units,
-            num_layers=num_layers,
-        )
+        config = AutoConfig.for_model(self.use_model)
+        config.rnn_hidden_size = hidden_units
+        config.num_stacked_layers = num_layers
 
-        # Create model and trainer
         model = AutoModel.from_config(config, predict_sequence_length=self.predict_sequence_length)
         trainer = KerasTrainer(model, optimizer_config={"learning_rate": learning_rate})
 
-        # Train the model
         trainer.train(self.train_data, self.valid_data, epochs=epochs, verbose=0)
 
-        # Evaluate the model (e.g., mean squared error)
         x_valid, y_valid = self.valid_data
         predictions = trainer.predict(x_valid)
         mse = np.mean((y_valid - predictions) ** 2)
@@ -62,3 +53,18 @@ def run(self, n_trials: int = 50, direction: str = "minimize"):
             print(f"    {key}: {value}")
 
         return study
+
+
+if __name__ == "__main__":
+    train_length = 24
+    predict_sequence_length = 8
+    (x_train, y_train), (x_valid, y_valid) = get_data("sine", train_length, predict_sequence_length, test_size=0.2)
+
+    tuner = AutoTuner(
+        use_model="rnn",
+        train_data=(x_train, y_train),
+        valid_data=(x_valid, y_valid),
+        predict_sequence_length=predict_sequence_length,
+    )
+
+    study = tuner.run(n_trials=20, direction="minimize")
diff --git a/tests/test_tasks/test_auto_task.py b/tests/test_tasks/test_auto_task.py
@@ -12,10 +12,10 @@ def setUp(self):
         self.layer = AnomalyHead(train_sequence_length=self.train_sequence_length)
 
     def test_call(self):
-        y_pred = tf.constant([[0.5, 0.2], [0.7, 0.3], [0.1, 0.1], [0.9, 0.4], [0.2, 0.3]])
-        y_test = tf.constant([[0.6, 0.1], [0.8, 0.2], [0.1, 0.0], [1.0, 0.5], [0.3, 0.4]])
+        y_pred = np.array([[0.5, 0.2], [0.7, 0.3], [0.1, 0.1], [0.9, 0.4], [0.2, 0.3]])
+        y_test = np.array([[0.6, 0.1], [0.8, 0.2], [0.1, 0.0], [1.0, 0.5], [0.3, 0.4]])
 
-        m_dist = self.layer.call(y_pred, y_test)
+        m_dist = self.layer(y_pred, y_test)
 
         # self.assertEqual(len(m_dist), self.train_sequence_length)
 
@@ -24,26 +24,16 @@ def test_call(self):
             self.assertGreaterEqual(dist, 0)
 
     def test_mahala_distance(self):
-        # Mock data for Mahalanobis distance calculation
         x = np.array([0.5, 0.2])
         mean = np.array([0.6, 0.1])
         cov = np.array([[0.01, 0.001], [0.001, 0.02]])
 
         # Calculate Mahalanobis distance using the layer's method
-        m_dist = self.layer._mahala_distance(x, mean, cov)
+        m_dist = self.layer.mahala_distantce(x, mean, cov)
 
         # The Mahalanobis distance should be a scalar value (float)
         self.assertIsInstance(m_dist, np.float64)
 
-    def test_empty_input(self):
-        y_pred = tf.constant([[]])
-        y_test = tf.constant([[]])
-
-        # Call the layer's call method with empty input, output should be an empty list
-        m_dist = self.layer.call(y_pred, y_test)
-        print(m_dist)
-        # self.assertEqual(len(m_dist), 0)
-
 
 class DeepARLayerTest(unittest.TestCase):
     def test_gaussian_layer(self):

diff --git a/tests/test_tuner.py b/tests/test_tuner.py
diff --git a/tfts/__init__.py b/tfts/__init__.py
@@ -12,7 +12,6 @@
 )
 from tfts.trainer import KerasTrainer, Trainer
 from tfts.training_args import TrainingArguments
-from tfts.tuner import AutoTuner
 
 __all__ = [
     "AutoModel",
@@ -22,7 +21,6 @@
     "AutoModelForAnomaly",
     "AutoModelForUncertainty",
     "AutoConfig",
-    "AutoTuner",
     "Trainer",
     "KerasTrainer",
     "TrainingArguments",

diff --git a/tfts/models/auto_model.py b/tfts/models/auto_model.py
@@ -104,10 +104,11 @@ class AutoModelForPrediction(AutoModel):
     def __call__(
         self,
         x: Union[tf.data.Dataset, Tuple[np.ndarray], Tuple[pd.DataFrame], List[np.ndarray], List[pd.DataFrame]],
+        output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
     ):
 
-        model_output = super().__call__(x, return_dict=return_dict)
+        model_output = self.model(x, return_dict=return_dict)
 
         if self.config.skip_connect_circle:
             x_mean = x[:, -self.predict_sequence_length :, 0:1]
@@ -128,10 +129,15 @@ def __init__(self, model, config):
     def __call__(
         self,
         x: Union[tf.data.Dataset, Tuple[np.ndarray], Tuple[pd.DataFrame], List[np.ndarray], List[pd.DataFrame]],
+        output_hidden_states: Optional[bool] = True,
         return_dict: Optional[bool] = None,
         **kwargs,
     ):
-        model_output = self.model(x, output_hidden_states=True)
+        if hasattr(self.model, "call"):
+            model_output = self.model(x)
+        else:
+            model_output = self.model(x, output_hidden_states=output_hidden_states)
+
         logits = self.head(model_output)
         return logits
 

diff --git a/tfts/models/base.py b/tfts/models/base.py
@@ -39,6 +39,7 @@ def from_pretrained(cls, weights_dir: str, predict_sequence_length: int = 1):
         return model
 
     def build_model(self, inputs: tf.keras.layers.Input) -> tf.keras.Model:
+        # only accept the inputs parameters after built
         outputs = self.model(inputs)
         # to handles the Keras symbolic tensors for tf2.3.1
         self.model = tf.keras.Model([inputs], [outputs])
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,3 @@ API @@
        layers
        models
        trainer
-       tuner