Merge pull request #479 from StijnvWijn/475-add-different-downsampling-methods-to-PatchGAN-discriminator

virginiafdez · web-flow · commit 4bc610abe035 · 2024-04-03T13:32:38.000+01:00
475 add different downsampling methods to patch gan discriminator
diff --git a/generative/networks/nets/patchgan_discriminator.py b/generative/networks/nets/patchgan_discriminator.py
@@ -17,7 +17,7 @@
 import torch
 import torch.nn as nn
 from monai.networks.blocks import Convolution
-from monai.networks.layers import Act
+from monai.networks.layers import Act, get_pool_layer
 
 
 class MultiScalePatchDiscriminator(nn.Sequential):
@@ -38,6 +38,8 @@ class MultiScalePatchDiscriminator(nn.Sequential):
         spatial_dims: number of spatial dimensions (1D, 2D etc.)
         num_channels: number of filters in the first convolutional layer (double of the value is taken from then on)
         in_channels: number of input channels
+        pooling_method: pooling method to be applied before each discriminator after the first.
+            If None, the number of layers is multiplied by the number of discriminators.
         out_channels: number of output channels in each discriminator
         kernel_size: kernel size of the convolution layers
         activation: activation layer type
@@ -52,10 +54,11 @@ class MultiScalePatchDiscriminator(nn.Sequential):
     def __init__(
         self,
         num_d: int,
-        num_layers_d: int,
+        num_layers_d: int | list[int],
         spatial_dims: int,
         num_channels: int,
         in_channels: int,
+        pooling_method: str = None,
         out_channels: int = 1,
         kernel_size: int = 4,
         activation: str | tuple = (Act.LEAKYRELU, {"negative_slope": 0.2}),
@@ -67,31 +70,67 @@ def __init__(
     ) -> None:
         super().__init__()
         self.num_d = num_d
+        if isinstance(num_layers_d, int) and pooling_method is None:
+            # if pooling_method is None, calculate the number of layers for each discriminator by multiplying by the number of discriminators
+            num_layers_d = [num_layers_d * i for i in range(1, num_d + 1)]
+        elif isinstance(num_layers_d, int) and pooling_method is not None:
+            # if pooling_method is not None, the number of layers is the same for all discriminators
+            num_layers_d = [num_layers_d] * num_d
         self.num_layers_d = num_layers_d
-        self.num_channels = num_channels
+        assert (
+            len(self.num_layers_d) == self.num_d
+        ), f"MultiScalePatchDiscriminator: num_d {num_d} must match the number of num_layers_d. {num_layers_d}"
+
         self.padding = tuple([int((kernel_size - 1) / 2)] * spatial_dims)
+
+        if pooling_method is None:
+            pool = None
+        else:
+            pool = get_pool_layer(
+                (pooling_method, {"kernel_size": kernel_size, "stride": 2, 'padding': self.padding}), spatial_dims=spatial_dims
+            )
+        self.num_channels = num_channels
         for i_ in range(self.num_d):
-            num_layers_d_i = self.num_layers_d * (i_ + 1)
+            num_layers_d_i = self.num_layers_d[i_]
             output_size = float(minimum_size_im) / (2**num_layers_d_i)
             if output_size < 1:
                 raise AssertionError(
                     "Your image size is too small to take in up to %d discriminators with num_layers = %d."
                     "Please reduce num_layers, reduce num_D or enter bigger images." % (i_, num_layers_d_i)
                 )
-            subnet_d = PatchDiscriminator(
-                spatial_dims=spatial_dims,
-                num_channels=self.num_channels,
-                in_channels=in_channels,
-                out_channels=out_channels,
-                num_layers_d=num_layers_d_i,
-                kernel_size=kernel_size,
-                activation=activation,
-                norm=norm,
-                bias=bias,
-                padding=self.padding,
-                dropout=dropout,
-                last_conv_kernel_size=last_conv_kernel_size,
-            )
+            if i_ == 0 or pool is None:
+                subnet_d = PatchDiscriminator(
+                    spatial_dims=spatial_dims,
+                    num_channels=self.num_channels,
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    num_layers_d=num_layers_d_i,
+                    kernel_size=kernel_size,
+                    activation=activation,
+                    norm=norm,
+                    bias=bias,
+                    padding=self.padding,
+                    dropout=dropout,
+                    last_conv_kernel_size=last_conv_kernel_size,
+                )
+            else:
+                subnet_d = nn.Sequential(
+                    *[pool] * i_,
+                    PatchDiscriminator(
+                        spatial_dims=spatial_dims,
+                        num_channels=self.num_channels,
+                        in_channels=in_channels,
+                        out_channels=out_channels,
+                        num_layers_d=num_layers_d_i,
+                        kernel_size=kernel_size,
+                        activation=activation,
+                        norm=norm,
+                        bias=bias,
+                        padding=self.padding,
+                        dropout=dropout,
+                        last_conv_kernel_size=last_conv_kernel_size,
+                    ),
+                )
 
             self.add_module("discriminator_%d" % i_, subnet_d)
 
diff --git a/tests/test_patch_gan.py b/tests/test_patch_gan.py
@@ -58,6 +58,65 @@
     [(1, 1, 32, 64, 32), (1, 1, 4, 8, 4)],
     [4, 7],
 ]
+TEST_3D_POOL = [
+{
+        "num_d": 2,
+        "num_layers_d": 3,
+        "spatial_dims": 3,
+        "num_channels": 8,
+        "in_channels": 3,
+        "out_channels": 1,
+        "kernel_size": 3,
+        "pooling_method": "max",
+        "activation": "LEAKYRELU",
+        "norm": "instance",
+        "bias": False,
+        "dropout": 0.1,
+        "minimum_size_im": 256,
+    },
+    torch.rand([1, 3, 256, 512, 256]),
+    [(1, 1, 32, 64, 32), (1, 1, 16, 32, 16)],
+    [4, 4],
+]
+TEST_2D_POOL = [
+    {
+        "num_d": 4,
+        "num_layers_d": 3,
+        "spatial_dims": 2,
+        "num_channels": 8,
+        "in_channels": 3,
+        "out_channels": 1,
+        "kernel_size": 3,
+        "pooling_method": "avg",
+        "activation": "LEAKYRELU",
+        "norm": "instance",
+        "bias": False,
+        "dropout": 0.1,
+        "minimum_size_im": 256,
+    },
+    torch.rand([1, 3, 256, 512]),
+    [(1, 1, 32, 64), (1, 1, 16, 32), (1, 1, 8, 16), (1, 1, 4, 8)],
+    [4, 4, 4, 4],
+]
+TEST_LAYER_LIST = [
+    {
+        "num_d": 3,
+        "num_layers_d": [3,4,5],
+        "spatial_dims": 2,
+        "num_channels": 8,
+        "in_channels": 3,
+        "out_channels": 1,
+        "kernel_size": 3,
+        "activation": "LEAKYRELU",
+        "norm": "instance",
+        "bias": False,
+        "dropout": 0.1,
+        "minimum_size_im": 256,
+    },
+    torch.rand([1, 3, 256, 512]),
+    [(1, 1, 32, 64), (1, 1, 16, 32), (1, 1, 8, 16)],
+    [4, 5, 6],
+]
 TEST_TOO_SMALL_SIZE = [
     {
         "num_d": 2,
@@ -74,9 +133,24 @@
         "minimum_size_im": 256,
     }
 ]
+TEST_MISMATCHED_NUM_LAYERS = [
+    {
+        "num_d": 5,
+        "num_layers_d": [3,4,5],
+        "spatial_dims": 2,
+        "num_channels": 8,
+        "in_channels": 3,
+        "out_channels": 1,
+        "kernel_size": 3,
+        "activation": "LEAKYRELU",
+        "norm": "instance",
+        "bias": False,
+        "dropout": 0.1,
+        "minimum_size_im": 256,
+    }
+]
 
-CASES = [TEST_2D, TEST_3D]
-
+CASES = [TEST_2D, TEST_3D, TEST_3D_POOL, TEST_2D_POOL, TEST_LAYER_LIST]
 
 class TestPatchGAN(unittest.TestCase):
     @parameterized.expand(CASES)
@@ -93,6 +167,10 @@ def test_too_small_shape(self):
         with self.assertRaises(AssertionError):
             MultiScalePatchDiscriminator(**TEST_TOO_SMALL_SIZE[0])
 
+    def test_mismatched_num_layers(self):
+        with self.assertRaises(AssertionError):
+            MultiScalePatchDiscriminator(**TEST_MISMATCHED_NUM_LAYERS[0])
+
     def test_script(self):
         net = MultiScalePatchDiscriminator(
             num_d=2,
diff --git a/tutorials/generative/2d_spade_gan/2d_spade_vae.py b/tutorials/generative/2d_spade_gan/2d_spade_vae.py
@@ -357,4 +357,4 @@ def feature_loss(input_features_disc_fake, input_features_disc_real, lambda_feat
 # + [markdown] pycharm={"name": "#%%"}
 # **Conclusion**: from early on, the network shows the capability of discern between the different semantic layers. To achieve good image quality, more images and training time are needed (to avoid overfitting, seen in some loss plots of previous example), as well as thorough optimisation, such as establishing an adversarial schedule that makes sure that the discriminator and generator and the discriminator are trained only when their performance does not exceed a certain limit.
 #
-# -
+# -

Original file line number	Diff line number	Diff line change
`@@ -357,4 +357,4 @@ def feature_loss(input_features_disc_fake, input_features_disc_real, lambda_feat`
`357`	`357`	`# + [markdown] pycharm={"name": "#%%"}`
`358`	`358`	`# Conclusion: from early on, the network shows the capability of discern between the different semantic layers. To achieve good image quality, more images and training time are needed (to avoid overfitting, seen in some loss plots of previous example), as well as thorough optimisation, such as establishing an adversarial schedule that makes sure that the discriminator and generator and the discriminator are trained only when their performance does not exceed a certain limit.`
`359`	`359`	`#`
`360`		`-# -`
	`360`	`+# -`