You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
num_latents=256, # they still had to use a fair amount of latents for good results (256), in line with the Perceiver line of papers from Deepmind
348
351
learned_sinusoidal_dim=16,
349
352
latent_token_time_cond=False, # whether to use 1 latent token as time conditioning, or do it the adaptive layernorm way (which is highly effective as shown by some other papers "Paella" - Dominic Rampas et al.)
353
+
dual_patchnorm=True,
350
354
**attn_kwargs
351
355
):
352
356
super().__init__()
@@ -378,9 +382,11 @@ def __init__(
378
382
379
383
# pixels to patch and back
380
384
381
-
self.to_patches=nn.Sequential(
385
+
self.to_patches=Sequential(
382
386
Rearrange('b c (h p1) (w p2) -> b (h w) (c p1 p2)', p1=patch_size, p2=patch_size),
0 commit comments