ml-gde · Oct 9, 2024
diff --git a/‎.github/CODEOWNERS
+1-1 b/‎.github/CODEOWNERS
+1-1
diff --git a/‎jflux/cli.py
+2-3 b/‎jflux/cli.py
+2-3
diff --git a/‎jflux/modules/conditioner.py
+2-2 b/‎jflux/modules/conditioner.py
+2-2
diff --git a/‎jflux/modules/layers.py
+6 b/‎jflux/modules/layers.py
+6
diff --git a/‎jflux/port.py
+369-118 b/‎jflux/port.py
+369-118
diff --git a/‎jflux/util.py
+10-10 b/‎jflux/util.py
+10-10
diff --git a/‎tests/modules/test_layers.py
+3-4 b/‎tests/modules/test_layers.py
+3-4
diff --git a/‎tests/test_sampling.py
+7-8 b/‎tests/test_sampling.py
+7-8
@@ -1 +1 @@
-* @SauravMaheshkar
+* @SauravMaheshkar @ariG23498
@@ -6,13 +6,12 @@
 
 import jax
 import jax.numpy as jnp
-from flax import nnx
+from einops import rearrange
 from fire import Fire
+from flax import nnx
 from jax.typing import DTypeLike
-
 from PIL import Image
 
-from einops import rearrange
 from jflux.sampling import denoise, get_noise, get_schedule, prepare, unpack
 from jflux.util import configs, load_ae, load_clip, load_flow_model, load_t5
 
 
@@ -1,7 +1,7 @@
 # Note: This is a torch module not a Jax module
-from torch import nn
-from chex import Array
 import jax.numpy as jnp
+from chex import Array
+from torch import nn
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
 
 
 
@@ -214,6 +214,7 @@ def __init__(
         self.img_norm1 = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
@@ -229,6 +230,7 @@ def __init__(
         self.img_norm2 = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
@@ -257,6 +259,7 @@ def __init__(
         self.txt_norm1 = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
@@ -272,6 +275,7 @@ def __init__(
         self.txt_norm2 = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
@@ -382,6 +386,7 @@ def __init__(
         self.pre_norm = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
@@ -419,6 +424,7 @@ def __init__(
         self.norm_final = nnx.LayerNorm(
             num_features=hidden_size,
             use_scale=False,
+            use_bias=False,
             epsilon=1e-6,
             rngs=rngs,
             param_dtype=param_dtype,
 
@@ -1,8 +1,8 @@
 import os
 from dataclasses import dataclass
 
-import torch  # need for t5 and clip
 import jax
+import torch  # need for t5 and clip
 from flax import nnx
 from huggingface_hub import hf_hub_download
 from jax import numpy as jnp
@@ -12,8 +12,7 @@
 from jflux.model import Flux, FluxParams
 from jflux.modules.autoencoder import AutoEncoder, AutoEncoderParams
 from jflux.modules.conditioner import HFEmbedder
-
-from port import port_autoencoder
+from jflux.port import port_autoencoder, port_flux
 
 
 @dataclass
@@ -128,13 +127,14 @@ def load_flow_model(name: str, hf_download: bool = True) -> Flux:
 
     model = Flux(params=configs[name].params)
 
-    # TODO (ariG23498): Port the flux model
     if ckpt_path is not None:
-        print("Loading checkpoint")
-        # load_sft doesn't support torch.device
-        sd = load_sft(ckpt_path)
-        missing, unexpected = model.load_state_dict(sd, strict=False, assign=True)
-        print_load_warning(missing, unexpected)
+        tensors = {}
+        with safe_open(ckpt_path, framework="flax") as f:
+            for k in f.keys():
+                tensors[k] = f.get_tensor(k)
+
+        model = port_flux(flux=model, tensors=tensors)
+        del tensors
     return model
 
 
@@ -166,12 +166,12 @@ def load_ae(name: str, hf_download: bool = True) -> AutoEncoder:
     print("Init AE")
     ae = AutoEncoder(params=configs[name].ae_params)
 
-    # TODO (ariG23498): Port the flux model
     if ckpt_path is not None:
         tensors = {}
         with safe_open(ckpt_path, framework="flax") as f:
             for k in f.keys():
                 tensors[k] = f.get_tensor(k)
 
         ae = port_autoencoder(autoencoder=ae, tensors=tensors)
+        del tensors
     return ae
@@ -1,27 +1,26 @@
+import jax
 import jax.numpy as jnp
 import numpy as np
-import jax
 import torch
 from einops import rearrange, repeat
 from flax import nnx
 from flux.modules.layers import DoubleStreamBlock as TorchDoubleStreamBlock
+from flux.modules.layers import EmbedND as TorchEmbedND
 from flux.modules.layers import MLPEmbedder as TorchMLPEmbedder
 from flux.modules.layers import Modulation as TorchModulation
 from flux.modules.layers import QKNorm as TorchQKNorm
 from flux.modules.layers import RMSNorm as TorchRMSNorm
 from flux.modules.layers import SelfAttention as TorchSelfAttention
 from flux.modules.layers import timestep_embedding as torch_timesetp_embedding
-from flux.modules.layers import EmbedND as TorchEmbedND
 
 from jflux.modules.layers import DoubleStreamBlock as JaxDoubleStreamBlock
+from jflux.modules.layers import EmbedND as JaxEmbedND
 from jflux.modules.layers import MLPEmbedder as JaxMLPEmbedder
 from jflux.modules.layers import Modulation as JaxModulation
 from jflux.modules.layers import QKNorm as JaxQKNorm
 from jflux.modules.layers import RMSNorm as JaxRMSNorm
 from jflux.modules.layers import SelfAttention as JaxSelfAttention
 from jflux.modules.layers import timestep_embedding as jax_timestep_embedding
-from jflux.modules.layers import EmbedND as JaxEmbedND
-
 from tests.utils import torch2jax
 
 
 
@@ -1,11 +1,11 @@
-import numpy as np
+import chex
 import jax
+import numpy as np
 import torch
-import chex
-from jflux.sampling import get_noise as jax_get_noise
-
 from flux.sampling import get_noise as torch_get_noise
 
+from jflux.sampling import get_noise as jax_get_noise
+
 
 class SamplingTestCase(chex.TestCase):
     def test_get_noise(self):
@@ -22,16 +22,15 @@ def test_get_noise(self):
             num_samples=1,
             height=height,
             width=width,
-            dtype=jax.dtypes.bfloat16,
+            dtype=jax.numpy.float32,
             seed=jax.random.PRNGKey(seed=42),
         )
         x_torch = torch_get_noise(
             num_samples=1,
             height=height,
             width=width,
-            dtype=torch.bfloat16,
+            dtype=torch.float32,
             seed=42,
-            device="cuda",
+            device="cuda" if torch.cuda.is_available() else "cpu",
         )
-        print(x_jax.shape)
         chex.assert_equal_shape([x_jax, x_torch])
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-* @SauravMaheshkar`
	`1`	`+* @SauravMaheshkar @ariG23498`