fix nan loss while training

hunglc007 · hunglc007 · commit a61f81f9118d · 2020-07-10T17:46:56.000+07:00
diff --git a/core/common.py b/core/common.py
@@ -36,32 +36,11 @@ def convolutional(input_layer, filters_shape, downsample=False, activate=True, b
             conv = tf.nn.leaky_relu(conv, alpha=0.1)
         elif activate_type == "mish":
             conv = mish(conv)
-            # conv = softplus(conv)
-            # conv = conv * tf.math.tanh(tf.math.softplus(conv))
-            # conv = conv * tf.tanh(softplus(conv))
-            # conv = tf.nn.leaky_relu(conv, alpha=0.1)
-            # conv = tfa.activations.mish(conv)
-            # conv = conv * tf.nn.tanh(tf.keras.activations.relu(tf.nn.softplus(conv), max_value=20))
-            # conv = tf.nn.softplus(conv)
-            # conv = tf.keras.activations.relu(tf.nn.softplus(conv), max_value=20)
-
     return conv
-def softplus(x, threshold = 20.):
-    def f1():
-        return x
-    def f2():
-        return tf.exp(x)
-    def f3():
-        return tf.math.log(1 + tf.exp(x))
-    # mask = tf.greater(x, threshold)
-    # x = tf.exp(x[mask])
-    # return tf.exp(x)
-    return tf.case([(tf.greater(x, tf.constant(threshold)), lambda:f1()), (tf.less(x, tf.constant(-threshold)), lambda:f2())], default=lambda:f3())
-    # return tf.case([(tf.greater(x, threshold), lambda:f1())])
+
 def mish(x):
-    return tf.keras.layers.Lambda(lambda x: x*tf.tanh(tf.math.log(1+tf.exp(x))))(x)
-    # return tf.keras.layers.Lambda(lambda x: softplus(x))(x)
-    # return tf.keras.layers.Lambda(lambda x: x * tf.tanh(softplus(x)))(x)
+    return x * tf.math.tanh(tf.math.softplus(x))
+    # return tf.keras.layers.Lambda(lambda x: x*tf.tanh(tf.math.log(1+tf.exp(x))))(x)
 
 def residual_block(input_layer, input_channel, filter_num1, filter_num2, activate_type='leaky'):
     short_cut = input_layer
diff --git a/train.py b/train.py
@@ -11,7 +11,7 @@
 from core.utils import freeze_all, unfreeze_all
 
 flags.DEFINE_string('model', 'yolov4', 'yolov4, yolov3')
-flags.DEFINE_string('weights', './data/yolov4.weights', 'pretrained weights')
+flags.DEFINE_string('weights', './scripts/yolov4.weights', 'pretrained weights')
 flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
 
 def main(_argv):
@@ -60,6 +60,7 @@ def main(_argv):
             bbox_tensors.append(bbox_tensor)
 
     model = tf.keras.Model(input_layer, bbox_tensors)
+    model.summary()
 
     if FLAGS.weights == None:
         print("Training from scratch")
@@ -75,6 +76,8 @@ def main(_argv):
     if os.path.exists(logdir): shutil.rmtree(logdir)
     writer = tf.summary.create_file_writer(logdir)
 
+    # define training step function
+    # @tf.function
     def train_step(image_data, target):
         with tf.GradientTape() as tape:
             pred_result = model(image_data, training=True)
@@ -92,8 +95,8 @@ def train_step(image_data, target):
 
             gradients = tape.gradient(total_loss, model.trainable_variables)
             optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-            tf.print("=> STEP %4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
-                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(),
+            tf.print("=> STEP %4d/%4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
+                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(),
                                                                giou_loss, conf_loss,
                                                                prob_loss, total_loss))
             # update learning rate