diff --git a/nodes.py b/nodes.py index a4c51db..43c3d82 100644 --- a/nodes.py +++ b/nodes.py @@ -12,12 +12,17 @@ import folder_paths from .ops import GGMLTensor, GGMLOps +from .dequant import dequantize_tensor -# Add a custom key for files ending in .gguf +# Add a custom keys for files ending in .gguf if "unet_gguf" not in folder_paths.folder_names_and_paths: orig = folder_paths.folder_names_and_paths.get("diffusion_models", folder_paths.folder_names_and_paths.get("unet", [[], set()])) folder_paths.folder_names_and_paths["unet_gguf"] = (orig[0], {".gguf"}) +if "clip_gguf" not in folder_paths.folder_names_and_paths: + orig = folder_paths.folder_names_and_paths.get("clip", [[], set()]) + folder_paths.folder_names_and_paths["clip_gguf"] = (orig[0], {".gguf"}) + def gguf_sd_loader(path): """ Read state dict as fake tensors @@ -42,6 +47,68 @@ def gguf_sd_loader(path): print("\n") return sd +# for remapping llama.cpp -> original key names +clip_sd_map = { + "enc.": "encoder.", + ".blk.": ".block.", + "token_embd": "shared", + "output_norm": "final_layer_norm", + "attn_q": "layer.0.SelfAttention.q", + "attn_k": "layer.0.SelfAttention.k", + "attn_v": "layer.0.SelfAttention.v", + "attn_o": "layer.0.SelfAttention.o", + "attn_norm": "layer.0.layer_norm", + "attn_rel_b": "layer.0.SelfAttention.relative_attention_bias", + "ffn_up": "layer.1.DenseReluDense.wi_1", + "ffn_down": "layer.1.DenseReluDense.wo", + "ffn_gate": "layer.1.DenseReluDense.wi_0", + "ffn_norm": "layer.1.layer_norm", +} +# weights that should be dequantized on load +clip_sd_dequant = { + "shared.weight", +} + +def gguf_clip_loader(path): + raw_sd = gguf_sd_loader(path) + assert "enc.blk.23.ffn_up.weight" in raw_sd, "Invalid Text Encoder!" + sd = {} + for k,v in raw_sd.items(): + for s,d in clip_sd_map.items(): + k = k.replace(s,d) + if k in clip_sd_dequant: + v = dequantize_tensor(v, torch.float32).to(torch.float16) + v = GGMLTensor(v, tensor_type=gguf.GGMLQuantizationType.F16, tensor_shape=v.shape) + sd[k] = v + return sd + +# TODO: Temporary fix for now +class GGUFModelPatcher(comfy.model_patcher.ModelPatcher): + def calculate_weight(self, patches, weight, key): + if isinstance(weight, GGMLTensor): + qtype = weight.tensor_type + # TODO: don't even store these in a custom format + if qtype in [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16]: + return super().calculate_weight(patches, weight, key) + else: + weight.patches.append((super().calculate_weight, patches, key)) + return weight + else: + return super().calculate_weight(patches, weight, key) + + def clone(self, *args, **kwargs): + n = GGUFModelPatcher(self.model, self.load_device, self.offload_device, self.size, weight_inplace_update=self.weight_inplace_update) + n.patches = {} + for k in self.patches: + n.patches[k] = self.patches[k][:] + n.patches_uuid = self.patches_uuid + + n.object_patches = self.object_patches.copy() + n.model_options = copy.deepcopy(self.model_options) + n.backup = self.backup + n.object_patches_backup = self.object_patches_backup + return n + class UnetLoaderGGUF: @classmethod def INPUT_TYPES(s): @@ -69,33 +136,117 @@ def load_unet(self, unet_name): model = GGUFModelPatcher.clone(model) return (model,) -# TODO: Temporary fix for now -class GGUFModelPatcher(comfy.model_patcher.ModelPatcher): - def calculate_weight(self, patches, weight, key): - if isinstance(weight, GGMLTensor): - qtype = weight.tensor_type - # TODO: don't even store these in a custom format - if qtype in [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16]: - return super().calculate_weight(patches, weight, key) +clip_name_dict = { + "stable_diffusion": comfy.sd.CLIPType.STABLE_DIFFUSION, + "stable_cascade": comfy.sd.CLIPType.STABLE_CASCADE, + "stable_audio": comfy.sd.CLIPType.STABLE_AUDIO, + "sdxl": comfy.sd.CLIPType.STABLE_DIFFUSION, + "sd3": comfy.sd.CLIPType.SD3, + "flux": comfy.sd.CLIPType.FLUX, +} + +class CLIPLoaderGGUF: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "clip_name": (s.get_filename_list(),), + "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio"],), + } + } + + RETURN_TYPES = ("CLIP",) + FUNCTION = "load_clip" + CATEGORY = "bootleg" + TITLE = "CLIPLoader (GGUF)" + + @classmethod + def get_filename_list(s): + files = [] + files += folder_paths.get_filename_list("clip") + files += folder_paths.get_filename_list("clip_gguf") + return sorted(files) + + def load_data(self, ckpt_paths): + clip_data = [] + for p in ckpt_paths: + if p.endswith(".gguf"): + clip_data.append(gguf_clip_loader(p)) else: - weight.patches.append((super().calculate_weight, patches, key)) - return weight - else: - return super().calculate_weight(patches, weight, key) + sd = comfy.utils.load_torch_file(p, safe_load=True) + clip_data.append( + {k:GGMLTensor(v, tensor_type=gguf.GGMLQuantizationType.F16, tensor_shape=v.shape) for k,v in sd.items()} + ) + return clip_data - def clone(self, *args, **kwargs): - n = GGUFModelPatcher(self.model, self.load_device, self.offload_device, self.size, weight_inplace_update=self.weight_inplace_update) - n.patches = {} - for k in self.patches: - n.patches[k] = self.patches[k][:] - n.patches_uuid = self.patches_uuid + def load_patcher(self, clip_paths, clip_type, clip_data): + clip = comfy.sd.load_text_encoder_state_dicts( + clip_type = clip_type, + state_dicts = clip_data, + model_options = {"custom_operations": GGMLOps}, + embedding_directory = folder_paths.get_folder_paths("embeddings"), + ) + clip.patcher = GGUFModelPatcher.clone(clip.patcher) - n.object_patches = self.object_patches.copy() - n.model_options = copy.deepcopy(self.model_options) - n.backup = self.backup - n.object_patches_backup = self.object_patches_backup - return n + # for some reason this is just missing in some SAI checkpoints + if hasattr(clip.cond_stage_model, "clip_l"): + if clip.cond_stage_model.clip_l.transformer.text_projection.weight.tensor_shape == None: + clip.cond_stage_model.clip_l.transformer.text_projection = comfy.ops.manual_cast.Linear(768, 768) + if hasattr(clip.cond_stage_model, "clip_g"): + if clip.cond_stage_model.clip_g.transformer.text_projection.weight.tensor_shape == None: + clip.cond_stage_model.clip_g.transformer.text_projection = comfy.ops.manual_cast.Linear(1280, 1280) + + return clip + + def load_clip(self, clip_name, type="stable_diffusion"): + clip_path = folder_paths.get_full_path("clip", clip_name) + clip_type = clip_name_dict.get(type, comfy.sd.CLIPType.STABLE_DIFFUSION) + return (self.load_patcher([clip_path], clip_type, self.load_data([clip_path])),) + +class DualCLIPLoaderGGUF(CLIPLoaderGGUF): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "clip_name1": (s.get_filename_list(), ), + "clip_name2": (s.get_filename_list(), ), + "type": (["sdxl", "sd3", "flux"], ), + } + } + + TITLE = "DualCLIPLoader (GGUF)" + + def load_clip(self, clip_name1, clip_name2, type): + clip_path1 = folder_paths.get_full_path("clip", clip_name1) + clip_path2 = folder_paths.get_full_path("clip", clip_name2) + clip_paths = [clip_path1, clip_path2] + clip_type = clip_name_dict.get(type, comfy.sd.CLIPType.STABLE_DIFFUSION) + return (self.load_patcher(clip_paths, clip_type, self.load_data(clip_paths)),) + +class TripleCLIPLoaderGGUF(CLIPLoaderGGUF): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "clip_name1": (s.get_filename_list(), ), + "clip_name2": (s.get_filename_list(), ), + "clip_name3": (s.get_filename_list(), ), + } + } + + TITLE = "TripleCLIPLoader (GGUF)" + + def load_clip(self, clip_name1, clip_name2, clip_name3, type="sd3"): + clip_path1 = folder_paths.get_full_path("clip", clip_name1) + clip_path2 = folder_paths.get_full_path("clip", clip_name2) + clip_path3 = folder_paths.get_full_path("clip", clip_name3) + clip_paths = [clip_path1, clip_path2, clip_path3] + clip_type = clip_name_dict.get(type, comfy.sd.CLIPType.STABLE_DIFFUSION) + return (self.load_patcher(clip_paths, clip_type, self.load_data(clip_paths)),) NODE_CLASS_MAPPINGS = { "UnetLoaderGGUF": UnetLoaderGGUF, + "CLIPLoaderGGUF": CLIPLoaderGGUF, + "DualCLIPLoaderGGUF": DualCLIPLoaderGGUF, + "TripleCLIPLoaderGGUF": TripleCLIPLoaderGGUF, } diff --git a/ops.py b/ops.py index 4358cce..d214fc6 100644 --- a/ops.py +++ b/ops.py @@ -32,6 +32,13 @@ def clone(self, *args, **kwargs): def detach(self, *args, **kwargs): return self + def copy_(self, *args, **kwargs): + # fixes .weight.copy_ in comfy/clip_model/CLIPTextModel + try: + return super().copy_(*args, **kwargs) + except Exception as e: + print(f"ignoring 'copy_' on tensor") + @property def shape(self): if not hasattr(self, "tensor_shape"): @@ -44,7 +51,7 @@ class GGMLLayer(torch.nn.Module): """ def __init__(self, *args, **kwargs): super().__init__() - self.weight = None + self.weight = GGMLTensor(1, tensor_type=None, tensor_shape=None) self.bias = None def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): @@ -119,7 +126,7 @@ def get_weights(self, dtype=torch.float16): bias = self.get_weight(self.bias, dtype) return (weight, bias) -class GGMLOps(comfy.ops.disable_weight_init): +class GGMLOps(comfy.ops.manual_cast): """ Dequantize weights on the fly before doing the compute """ @@ -142,4 +149,4 @@ def forward(self, x): if device: self.to(device) - return x \ No newline at end of file + return x