Merge branch 'rom1504-custom_dataset'

pesser · pesser · commit 9d17ea64b820 · 2021-06-24T10:47:34.000+02:00
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@
 
 
 ### News
+- Thanks to [rom1504](https://github.com/rom1504) it is now easy to [train a VQGAN on your own datasets](#training-on-custom-data).
 - Included a bugfix for the quantizer. For backward compatibility it is
   disabled by default (which corresponds to always training with `beta=1.0`).
   Use `legacy=False` in the quantizer config to enable it.
@@ -180,6 +181,17 @@ included in the repository, run
 streamlit run scripts/sample_conditional.py -- -r logs/2020-11-20T21-45-44_ade20k_transformer/ --ignore_base_data data="{target: main.DataModuleFromConfig, params: {batch_size: 1, validation: {target: taming.data.ade20k.Examples}}}"
 ```
 
+## Training on custom data
+
+Training on your own dataset can be beneficial to get better tokens and hence better images for your domain.
+Those are the steps to follow to make this work:
+1. install the repo with `conda env create -f environment.yaml`, `conda activate taming` and `pip install -e .`
+1. put your .jpg files in a folder `your_folder`
+2. create 2 text files a `xx_train.txt` and `xx_test.txt` that point to the files in your training and test set respectively (for example `find $(pwd)/your_folder -name "*.jpg" > train.txt`)
+3. adapt `configs/custom_vqgan.yaml` to point to these 2 files
+4. run `python main.py --base configs/custom_vqgan.yaml -t True --gpus 0,1` to
+   train on two GPUs. Use `--gpus 0,` (with a trailing comma) to train on a single GPU.
+
 ## Data Preparation
 
 ### ImageNet
diff --git a/configs/custom_vqgan.yaml b/configs/custom_vqgan.yaml
@@ -0,0 +1,43 @@
+model:
+  base_learning_rate: 4.5e-6
+  target: taming.models.vqgan.VQModel
+  params:
+    embed_dim: 256
+    n_embed: 1024
+    ddconfig:
+      double_z: False
+      z_channels: 256
+      resolution: 256
+      in_channels: 3
+      out_ch: 3
+      ch: 128
+      ch_mult: [ 1,1,2,2,4]  # num_down = len(ch_mult)-1
+      num_res_blocks: 2
+      attn_resolutions: [16]
+      dropout: 0.0
+
+    lossconfig:
+      target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
+      params:
+        disc_conditional: False
+        disc_in_channels: 3
+        disc_start: 10000
+        disc_weight: 0.8
+        codebook_weight: 1.0
+
+data:
+  target: main.DataModuleFromConfig
+  params:
+    batch_size: 5
+    num_workers: 8
+    train:
+      target: taming.data.custom.CustomTrain
+      params:
+        training_images_list_file: some/training.txt
+        size: 256
+    validation:
+      target: taming.data.custom.CustomTest
+      params:
+        test_images_list_file: some/test.txt
+        size: 256
+
diff --git a/taming/data/custom.py b/taming/data/custom.py
@@ -0,0 +1,38 @@
+import os
+import numpy as np
+import albumentations
+from torch.utils.data import Dataset
+
+from taming.data.base import ImagePaths, NumpyPaths, ConcatDatasetWithIndex
+
+
+class CustomBase(Dataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.data = None
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, i):
+        example = self.data[i]
+        return example
+
+
+
+class CustomTrain(CustomBase):
+    def __init__(self, size, training_images_list_file):
+        super().__init__()
+        with open(training_images_list_file, "r") as f:
+            paths = f.read().splitlines()
+        self.data = ImagePaths(paths=paths, size=size, random_crop=False)
+
+
+class CustomTest(CustomBase):
+    def __init__(self, size, test_images_list_file):
+        super().__init__()
+        with open(test_images_list_file, "r") as f:
+            paths = f.read().splitlines()
+        self.data = ImagePaths(paths=paths, size=size, random_crop=False)
+
+