0.67

justUmen · Jan 23, 2025 · 8a75181 · 8a75181
1 parent 4a9c7c8
commit 8a75181
Show file tree

Hide file tree

Showing 7 changed files with 181 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# 🔗 Comfyui : Bjornulf_custom_nodes v0.66 🔗
+# 🔗 Comfyui : Bjornulf_custom_nodes v0.67 🔗
 
-A list of 119 custom nodes for Comfyui : Display, manipulate, create and edit text, images, videos, loras, generate characters and more.  
+A list of 120 custom nodes for Comfyui : Display, manipulate, create and edit text, images, videos, loras, generate characters and more.  
 You can manage looping operations, generate randomized content, trigger logical conditions, pause and manually control your workflows and even work with external AI tools, like Ollama or Text To Speech.  
 
 # Coffee : ☕☕☕☕☕ 5/5
@@ -192,6 +192,7 @@ Support me and my work : ❤️❤️❤️ <https://ko-fi.com/bjornulf> ❤️
 `59.` [📹🔊 Combine Video + Audio](#59----combine-video--audio)  
 `66.` [🔊➜📝 STT - Speech to Text](#66----stt---speech-to-text)  
 `118.` [🔊 TTS Configuration ⚙](#118----tts-configuration-)  
+`120.` [📝➜🔊 Kokoro - Text to Speech](#120)  
 
 ## 💻 System 💻
 `34.` [🧹 Free VRAM hack](#34----free-vram-hack)  
@@ -360,6 +361,7 @@ cd /where/you/installed/ComfyUI && python main.py
 - **0.64**: remove "import wget", added some keywords to text generators.
 - **0.65**: ❗Breaking changes : Combine Text inputs are now all optional (PLease remake your nodes, sorry.) Add 6 new nodes : any2int, any2float, load text from folder, load text from path, load lora from path. Also upgraded the Save text node.
 - **0.66**: Add lora hunyuan CIVIT ai + download, add TTS configuration node, edit requirements.txt
+- **0.67**: Add kokoro TTS node.
 
 # 📝 Nodes descriptions
 
@@ -1679,4 +1681,12 @@ Take a CivitAI Lora to use with Hunyuan. (NSFW list not on github of course.)
 
 The workflow below is included : `workflows/HUNYUAN_basic_lora.json`) :  
 
-![hunyuan lora](screenshots/hunyuan_lora.png)  
+![hunyuan lora](screenshots/hunyuan_lora.png)  
+
+#### 120 - 📝➜🔊 Kokoro - Text to Speech
+
+**Description:**  
+Another Text to Speech node based on Kokoro. : https://github.com/thewh1teagle/kokoro-onnx  
+Lightweight, much simpler, no configuration and fully integrated into Comfyui. (No external backend to run.)  
+
+![tts kokoro](screenshots/kokoro_tts.png)  
diff --git a/__init__.py b/__init__.py
@@ -94,10 +94,12 @@
 from .load_text import LoadTextFromFolder, LoadTextFromPath
 from .string_splitter import TextSplitin5
 from .line_selector import LineSelector
+from .text_to_speech_kokoro import KokoroTTS
 # from .text_generator_t2v import TextGeneratorText2Video
 NODE_CLASS_MAPPINGS = {
     "Bjornulf_LineSelector": LineSelector,
     "Bjornulf_XTTSConfig": XTTSConfig,
+    "Bjornulf_KokoroTTS": KokoroTTS,
     # "Bjornulf_TextGeneratorText2Video": TextGeneratorText2Video,
     "Bjornulf_LatentResolutionSelector": LatentResolutionSelector,
     "Bjornulf_LoaderLoraWithPath": LoaderLoraWithPath,
@@ -223,10 +225,13 @@
 }
 
 NODE_DISPLAY_NAME_MAPPINGS = {
+    "Bjornulf_XTTSConfig": "🔊 TTS Configuration ⚙",
+    "Bjornulf_TextToSpeech": "📝➜🔊 TTS - Text to Speech",
     # "Bjornulf_HiResFix": "HiResFix",
     # "Bjornulf_ImageBlend": "🎨 Image Blend",
     # "Bjornulf_APIHiResCivitAI": "🎨➜🎨 API Image hires fix (CivitAI)",
     # "Bjornulf_CivitAILoraSelector": "lora Civit",
+    "Bjornulf_KokoroTTS": "📝➜🔊 Kokoro - Text to Speech",
     "Bjornulf_LineSelector": "📝👈 Line selector (🎲 Or random)",
     "Bjornulf_LoaderLoraWithPath": "📥👑 Load Lora with Path",
     # "Bjornulf_TextGeneratorText2Video": "🔥📝📹 Text Generator for text to video 📹📝🔥",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bjornulf_custom_nodes"
-description = "116 ComfyUI nodes : Display, manipulate, and edit text, images, videos, loras, generate characters and more. Manage looping operations, generate randomized content, use logical conditions and work with external AI tools, like Ollama or Text To Speech."
-version = "0.66"
+description = "120 ComfyUI nodes : Display, manipulate, and edit text, images, videos, loras, generate characters and more. Manage looping operations, generate randomized content, use logical conditions and work with external AI tools, like Ollama or Text To Speech Kokoro, etc..."
+version = "0.67"
 license = {file = "LICENSE"}
 
 [project.urls]

diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,5 @@ faster_whisper
 ffmpeg-python
 civitai-py
 fal_client
-importlib
+sounddevice
+kokoro_onnx
diff --git a/screenshots/kokoro_tts.png b/screenshots/kokoro_tts.png
diff --git a/text_to_speech_kokoro.py b/text_to_speech_kokoro.py
@@ -0,0 +1,145 @@
+import os
+import requests
+import random
+
+VOICE_OPTIONS = {
+    "af_bella": "Bella (American Female) - af_bella",
+    "af_nicole": "Nicole (American Female) - af_nicole",
+    "af_sarah": "Sarah (American Female) - af_sarah",
+    "af_sky": "Sky (American Female) - af_sky",
+    "af": "Default (American Female) - af",
+    "am_adam": "Adam (American Male) - am_adam",
+    "am_michael": "Michael (American Male) - am_michael",
+    "bf_emma": "Emma (British Female) - bf_emma",
+    "bf_isabella": "Isabella (British Female) - bf_isabella",
+    "bm_george": "George (British Male) - bm_george",
+    "bm_lewis": "Lewis (British Male) - bm_lewis"
+}
+
+# Create a reversed mapping for display to value
+VOICE_DISPLAY_TO_VALUE = {v: k for k, v in VOICE_OPTIONS.items()}
+
+LANGUAGE_OPTIONS = {
+    "en-us": "English (US)",
+    "en-gb": "English (UK)",
+    "fr-fr": "French",
+    "ja": "Japanese",
+    "ko": "Korean",
+    "cmn": "Chinese (Mandarin)"
+}
+
+def download_if_not_exists(url, dest_path):
+    """Download a file from a URL if it doesn't already exist."""
+    if not os.path.exists(dest_path):
+        print(f"Downloading {os.path.basename(dest_path)}...")
+        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with open(dest_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Downloaded {os.path.basename(dest_path)}")
+
+class KokoroTTS:
+    BASE_DIR = "Bjornulf/Kokoro"
+    MODEL_FILE = os.path.join(BASE_DIR, "kokoro-v0_19.onnx")
+    VOICES_FILE = os.path.join(BASE_DIR, "voices.bin")
+
+    VOICE_LANGUAGES = {
+        'af': 'en-us', 'am': 'en-us', 'bf': 'en-gb', 'bm': 'en-gb'
+    }
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "text": ("STRING", {"multiline": True}),
+                "voice": (list(VOICE_OPTIONS.values()), {"default": "Default (American Female) - af"}),
+                "language": (list(LANGUAGE_OPTIONS.keys()), {"default": "en-us"}),
+                "speed": ("FLOAT", {"default": 1.0, "min": 0.5, "max": 2.0, "step": 0.1}),
+                "autoplay": ("BOOLEAN", {"default": True}),
+                "save_audio": ("BOOLEAN", {"default": True}),
+                "overwrite": ("BOOLEAN", {"default": False}),
+                "seed": ("INT", {"default": 0}),
+            }
+        }
+
+    RETURN_TYPES = ("AUDIO",)
+    FUNCTION = "generate_audio"
+    CATEGORY = "Bjornulf/Kokoro"
+
+    def generate_audio(self, text: str, voice: str, language: str, speed: float,
+                      autoplay: bool, save_audio: bool, 
+                      overwrite: bool, seed: int):
+        random.seed(seed)
+
+        config = {
+            "model_path": self.MODEL_FILE,
+            "voices_path": self.VOICES_FILE,
+            "speed": speed,
+            "language": language
+        }
+
+        download_if_not_exists(
+            "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
+            config["model_path"]
+        )
+        download_if_not_exists(
+            "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
+            config["voices_path"]
+        )
+
+        try:
+            from kokoro_onnx import Kokoro
+            import soundfile as sf
+            import torch
+            import numpy as np
+            from pydub import AudioSegment
+            from pydub.playback import play
+
+            voice_id = VOICE_DISPLAY_TO_VALUE[voice]
+            kokoro = Kokoro(config["model_path"], config["voices_path"])
+
+            # Check if file exists and overwrite is False
+            sanitized_text = ''.join(c if c.isalnum() else '_' for c in text[:50])
+            save_path = os.path.join("Bjornulf_TTS_Kokoro", voice_id, f"{sanitized_text}.wav")
+            full_path = os.path.abspath(save_path)
+
+            if os.path.exists(full_path) and not overwrite:
+                print(f"File exists: {full_path}. Loading existing audio.")
+                samples, sample_rate = sf.read(full_path)
+                if autoplay:
+                    audio_segment = AudioSegment.from_file(full_path)
+                    play(audio_segment)
+            else:
+                # Generate new audio
+                samples, sample_rate = kokoro.create(
+                    text,
+                    voice=voice_id,
+                    speed=config["speed"],
+                    lang=language
+                )
+
+                if save_audio:
+                    os.makedirs(os.path.dirname(full_path), exist_ok=True)
+                    sf.write(full_path, samples, sample_rate)
+
+                if autoplay:
+                    try:
+                        audio_segment = AudioSegment(
+                            samples.tobytes(), 
+                            frame_rate=sample_rate,
+                            sample_width=samples.dtype.itemsize, 
+                            channels=1
+                        )
+                        play(audio_segment)
+                    except Exception as e:
+                        print(f"Autoplay error: {e}")
+
+            audio_tensor = torch.from_numpy(samples).unsqueeze(0)
+            audio_output = {"waveform": audio_tensor.unsqueeze(0), "sample_rate": sample_rate}
+            return (audio_output,)
+
+        except Exception as e:
+            print(f"Error in Kokoro TTS: {e}")
+            return ({"waveform": torch.zeros(1, 1, 1), "sample_rate": 22050},)
diff --git a/web/js/text_to_speech_kokoro.js b/web/js/text_to_speech_kokoro.js
@@ -0,0 +1,14 @@
+import { app } from "../../../scripts/app.js";
+
+app.registerExtension({
+    name: "Bjornulf.KokoroTTS",
+    async nodeCreated(node) {
+        if (node.comfyClass === "Bjornulf_KokoroTTS") {
+            // Set seed widget to hidden input
+            const seedWidget = node.widgets.find((w) => w.name === "seed");
+            if (seedWidget) {
+              seedWidget.type = "HIDDEN";
+            }
+        }
+    }
+});