change ale loss to ace, update default hyperparameters, use max iterations if zero checkpoint_interval

inzapp · inzapp · commit 829b9b052612 · 2024-06-02T18:55:20.000+09:00
diff --git a/ace.py b/ace.py
@@ -1,9 +1,9 @@
 """
 Authors : inzapp
 
-Github url : https://github.com/inzapp/absolute-logarithmic-error
+Github url : https://github.com/inzapp/adaptive-crossentropy
 
-Copyright 2022 inzapp Authors. All Rights Reserved.
+Copyright 2023 inzapp Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License"),
 you may not use this file except in compliance with the License.
@@ -20,25 +20,23 @@
 import tensorflow as tf
 
 
-class AbsoluteLogarithmicError(tf.keras.losses.Loss):
-    """Computes the cross-entropy log scale loss between true labels and predicted labels.
+class AdaptiveCrossentropy(tf.keras.losses.Loss):
+    """Computes the adaptive cross-entropy loss between true labels and predicted labels.
 
-    This loss function can be used regardless of classification problem or regression problem.
-
-    See: https://github.com/inzapp/absolute-logarithmic-error
+    See: https://github.com/inzapp/adaptive-crossentropy
 
     Standalone usage:
         >>> y_true = [[0, 1], [0, 0]]
         >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
-        >>> ale = AbsoluteLogarithmicError()
-        >>> loss = ale(y_true, y_pred)
+        >>> ace = AdaptiveCrossentropy()
+        >>> loss = ace(y_true, y_pred)
         >>> loss.numpy()
-        array([[0.9162905, 0.9162905], [0.5108255, 0.9162905]], dtype=float32)
+        array([[0.9162906 0.9162905], [0.5108254 0.9162906]], dtype=float32)
 
     Usage:
-        model.compile(optimizer='sgd', loss=AbsoluteLogarithmicError())
+        model.compile(optimizer='sgd', loss=AdaptiveCrossentropy())
     """
-    def __init__(self, alpha=0.0, gamma=0.0, label_smoothing=0.0, reduce='none', name='AbsoluteLogarithmicError'):
+    def __init__(self, alpha=0.0, gamma=0.0, label_smoothing=0.0, reduce='none', name='AdaptiveCrossentropy'):
         """
         Args:
             alpha: Weight of the loss where not positive value positioned in y_true tensor.
@@ -79,14 +77,14 @@ def call(self, y_true, y_pred):
         eps = tf.cast(self.eps, y_pred.dtype)
         y_true_clip = tf.clip_by_value(y_true, self.label_smoothing, 1.0 - self.label_smoothing)
         y_pred_clip = tf.clip_by_value(y_pred, eps, 1.0 - eps)
-        abs_error = tf.abs(y_true_clip - y_pred_clip)
-        loss = -tf.math.log((1.0 + eps) - abs_error)
+        loss = -((y_true * tf.math.log(y_pred + eps)) + ((1.0 - y_true) * tf.math.log(1.0 - y_pred + eps)))
         if self.alpha > 0.0:
             alpha = tf.ones_like(y_true) * self.alpha
             alpha = tf.where(y_true != 1.0, alpha, 1.0 - alpha)
             loss *= alpha
         if self.gamma >= 1.0:
-            loss *= tf.pow(abs_error, self.gamma)
+            adaptive_weight = tf.pow(tf.abs(y_true_clip - y_pred_clip), self.gamma)
+            loss *= adaptive_weight
         if self.reduce == 'mean':
             loss = tf.reduce_mean(loss)
         elif self.reduce == 'sum':
diff --git a/sigmoid_classifier.py b/sigmoid_classifier.py
@@ -34,7 +34,7 @@
 from live_plot import LivePlot
 from generator import DataGenerator
 from lr_scheduler import LRScheduler
-from ale import AbsoluteLogarithmicError
+from ace import AdaptiveCrossentropy
 from ckpt_manager import CheckpointManager
 
 
@@ -63,7 +63,7 @@ def __init__(self,
                  cam_activation_layer_name='cam_activation',
                  last_conv_layer_name='squeeze_conv'):
         super().__init__()
-        assert checkpoint_interval >= 1000
+        assert checkpoint_interval == 0 or checkpoint_interval >= 1000
         self.input_shape = input_shape
         self.lr = lr
         self.lrf = lrf
@@ -82,6 +82,8 @@ def __init__(self,
         self.pretrained_iteration_count = 0
         warnings.filterwarnings(action='ignore')
         self.set_model_name(model_name)
+        if self.checkpoint_interval == 0:
+            self.checkpoint_interval = self.iterations
 
         train_image_path = self.unify_path(train_image_path)
         validation_image_path = self.unify_path(validation_image_path)
@@ -247,7 +249,7 @@ def train(self):
         print(f'\ntrain on {len(self.train_image_paths)} samples')
         print(f'validate on {len(self.validation_image_paths)} samples\n')
         optimizer = tf.keras.optimizers.Adam(learning_rate=self.lr, beta_1=self.momentum)
-        loss_function = AbsoluteLogarithmicError(alpha=self.alpha, gamma=self.gamma, label_smoothing=self.label_smoothing)
+        loss_function = AdaptiveCrossentropy(alpha=self.alpha, gamma=self.gamma, label_smoothing=self.label_smoothing)
         lr_scheduler = LRScheduler(lr=self.lr, lrf=self.lrf, iterations=self.iterations, warm_up=self.warm_up, policy=self.lr_policy)
         self.init_checkpoint_dir()
         iteration_count = self.pretrained_iteration_count
diff --git a/train.py b/train.py
@@ -30,18 +30,18 @@
         input_shape=(64, 64, 1),
         lr=0.001,
         lrf=0.05,
-        alpha=0.5,
+        alpha=0.0,
         gamma=2.0,
-        warm_up=0.5,
+        warm_up=0.1,
         momentum=0.9,
-        batch_size=32,
+        batch_size=64,
         iterations=1000000,
         label_smoothing=0.1,
         aug_brightness=0.3,
-        aug_contrast=0.4,
-        aug_rotate=20,
+        aug_contrast=0.3,
+        aug_rotate=15,
         aug_h_flip=False,
-        checkpoint_interval=20000,
+        checkpoint_interval=10000,
         show_class_activation_map=False)
 
     parser = argparse.ArgumentParser()