vqgan-jax

patil-suraj · patil-suraj · commit 8ded673fda63 · 2021-06-28T14:19:56.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,155 @@
+# Initially taken from Github's Python gitignore file
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# tests and logs
+tests/fixtures/cached_*_text.txt
+logs/
+lightning_logs/
+lang_code_data/
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# vscode
+.vs
+.vscode
+
+# Pycharm
+.idea
+
+# TF code
+tensorflow_code
+
+# Models
+proc_data
+
+# data
+/data
+serialization_dir
+
+# emacs
+*.*~
+debug.env
+
+# vim
+.*.swp
+
+#ctags
+tags
+
+# pre-commit
+.pre-commit*
+
+# .lock
+*.lock
diff --git a/configuration_vqgan.py b/configuration_vqgan.py
@@ -0,0 +1,40 @@
+from typing import Tuple
+
+from transformers import PretrainedConfig
+
+
+class VQGANConfig(PretrainedConfig):
+    def __init__(
+        self,
+        ch: int = 128,
+        out_ch: int = 3,
+        in_channels: int = 3,
+        num_res_blocks: int = 2,
+        resolution: int = 256,
+        z_channels: int = 256,
+        ch_mult: Tuple = (1, 1, 2, 2, 4),
+        attn_resolutions: int = (16,),
+        n_embed: int = 1024,
+        embed_dim: int = 256,
+        dropout: float = 0.0,
+        double_z: bool = False,
+        resamp_with_conv: bool = True,
+        give_pre_end: bool = False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.ch = ch
+        self.out_ch = out_ch
+        self.in_channels = in_channels
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.z_channels = z_channels
+        self.ch_mult = list(ch_mult)
+        self.attn_resolutions = list(attn_resolutions)
+        self.n_embed = n_embed
+        self.embed_dim = embed_dim
+        self.dropout = dropout
+        self.double_z = double_z
+        self.resamp_with_conv = resamp_with_conv
+        self.give_pre_end = give_pre_end
+        self.num_resolutions = len(ch_mult)
diff --git a/convert_pt_model_to_jax.py b/convert_pt_model_to_jax.py
@@ -0,0 +1,109 @@
+import re
+
+import jax.numpy as jnp
+from flax.traverse_util import flatten_dict, unflatten_dict
+
+import torch
+
+from modeling_flax_vqgan import VQModel
+from configuration_vqgan import VQGANConfig
+
+
+regex = r"\w+[.]\d+"
+
+
+def rename_key(key):
+    pats = re.findall(regex, key)
+    for pat in pats:
+        key = key.replace(pat, "_".join(pat.split(".")))
+    return key
+
+
+# Adapted from https://github.com/huggingface/transformers/blob/ff5cdc086be1e0c3e2bbad8e3469b34cffb55a85/src/transformers/modeling_flax_pytorch_utils.py#L61
+def convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model):
+    # convert pytorch tensor to numpy
+    pt_state_dict = {k: v.numpy() for k, v in pt_state_dict.items()}
+
+    random_flax_state_dict = flatten_dict(flax_model.params)
+    flax_state_dict = {}
+
+    remove_base_model_prefix = (flax_model.base_model_prefix not in flax_model.params) and (
+        flax_model.base_model_prefix in set([k.split(".")[0] for k in pt_state_dict.keys()])
+    )
+    add_base_model_prefix = (flax_model.base_model_prefix in flax_model.params) and (
+        flax_model.base_model_prefix not in set([k.split(".")[0] for k in pt_state_dict.keys()])
+    )
+
+    # Need to change some parameters name to match Flax names so that we don't have to fork any layer
+    for pt_key, pt_tensor in pt_state_dict.items():
+        pt_tuple_key = tuple(pt_key.split("."))
+
+        has_base_model_prefix = pt_tuple_key[0] == flax_model.base_model_prefix
+        require_base_model_prefix = (flax_model.base_model_prefix,) + pt_tuple_key in random_flax_state_dict
+
+        if remove_base_model_prefix and has_base_model_prefix:
+            pt_tuple_key = pt_tuple_key[1:]
+        elif add_base_model_prefix and require_base_model_prefix:
+            pt_tuple_key = (flax_model.base_model_prefix,) + pt_tuple_key
+
+        # Correctly rename weight parameters
+        if (
+            "norm" in pt_key
+            and (pt_tuple_key[-1] == "bias")
+            and (pt_tuple_key[:-1] + ("bias",) in random_flax_state_dict)
+        ):
+            pt_tensor = pt_tensor[None, None, None, :]
+        elif (
+            "norm" in pt_key
+            and (pt_tuple_key[-1] == "bias")
+            and (pt_tuple_key[:-1] + ("scale",) in random_flax_state_dict)
+        ):
+            pt_tuple_key = pt_tuple_key[:-1] + ("scale",)
+            pt_tensor = pt_tensor[None, None, None, :]
+        elif pt_tuple_key[-1] in ["weight", "gamma"] and pt_tuple_key[:-1] + ("scale",) in random_flax_state_dict:
+            pt_tuple_key = pt_tuple_key[:-1] + ("scale",)
+            pt_tensor = pt_tensor[None, None, None, :]
+        if pt_tuple_key[-1] == "weight" and pt_tuple_key[:-1] + ("embedding",) in random_flax_state_dict:
+            pt_tuple_key = pt_tuple_key[:-1] + ("embedding",)
+        elif pt_tuple_key[-1] == "weight" and pt_tensor.ndim == 4 and pt_tuple_key not in random_flax_state_dict:
+            # conv layer
+            pt_tuple_key = pt_tuple_key[:-1] + ("kernel",)
+            pt_tensor = pt_tensor.transpose(2, 3, 1, 0)
+        elif pt_tuple_key[-1] == "weight" and pt_tuple_key not in random_flax_state_dict:
+            # linear layer
+            pt_tuple_key = pt_tuple_key[:-1] + ("kernel",)
+            pt_tensor = pt_tensor.T
+        elif pt_tuple_key[-1] == "gamma":
+            pt_tuple_key = pt_tuple_key[:-1] + ("weight",)
+        elif pt_tuple_key[-1] == "beta":
+            pt_tuple_key = pt_tuple_key[:-1] + ("bias",)
+
+        if pt_tuple_key in random_flax_state_dict:
+            if pt_tensor.shape != random_flax_state_dict[pt_tuple_key].shape:
+                raise ValueError(
+                    f"PyTorch checkpoint seems to be incorrect. Weight {pt_key} was expected to be of shape "
+                    f"{random_flax_state_dict[pt_tuple_key].shape}, but is {pt_tensor.shape}."
+                )
+
+        # also add unexpected weight so that warning is thrown
+        flax_state_dict[pt_tuple_key] = jnp.asarray(pt_tensor)
+
+    return unflatten_dict(flax_state_dict)
+
+
+def convert_model(config_path, pt_state_dict_path, save_path):
+    config = VQGANConfig.from_pretrained(config_path)
+    model = VQModel(config)
+
+    state_dict = torch.load(pt_state_dict_path, map_location="cpu")["state_dict"]
+    keys = list(state_dict.keys())
+    for key in keys:
+        if key.startswith("loss"):
+            state_dict.pop(key)
+            continue
+        renamed_key = rename_key(key)
+        state_dict[renamed_key] = state_dict.pop(key)
+
+    state = convert_pytorch_state_dict_to_flax(state_dict, model)
+    model.params = unflatten_dict(state)
+    model.save_pretrained(save_path)
diff --git a/modeling_flax_vqgan.py b/modeling_flax_vqgan.py