evandempsey · evandempsey · Nov 22, 2024 · Nov 21, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ An intelligent system that automatically generates engaging podcast conversation
 - High-quality text-to-speech synthesis using Google Cloud or ElevenLabs
 - Checkpoint system to save progress and resume generation
 - Configurable voices and audio settings
+- Gradio UI
 
 ## Examples
 
@@ -48,6 +49,10 @@ Listen to sample podcasts generated using Podcast-LLM:
 [![Play Podcast Sample](https://img.shields.io/badge/Play%20Podcast-brightgreen?style=for-the-badge&logo=soundcloud)](https://soundcloud.com/evan-dempsey-153309617/robotic-process-automation-google-voices)
 
 
+## Web Interface
+
+![Gradio Web Interface](assets/images/gradio_ui.png)
+
 ## Installation
 
 1. Install using pip:
@@ -92,6 +97,21 @@ Listen to sample podcasts generated using Podcast-LLM:
 
 3. Customize voices and other settings in `config/config.yaml`
 
+4. Launch the Gradio web interface:
+   ```bash
+   # Start the web UI
+   podcast-llm-gui
+   ```
+
+   This launches a user-friendly web interface where you can:
+   - Enter a podcast topic
+   - Choose between research and context modes
+   - Upload source files and URLs for context mode
+   - Configure Q&A rounds and checkpointing
+   - Specify output paths for text and audio
+   - Monitor generation progress in real-time
+
+
 ## License
 
 This project is licensed under Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)

diff --git a/assets/images/gradio_ui.png b/assets/images/gradio_ui.png
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -23,6 +23,7 @@ Welcome to Podcast LLM's documentation!
    modules/extractors_web
    modules/extractors_youtube
    modules/generate
+   modules/gui
    modules/models
    modules/outline
    modules/research

diff --git a/docs/source/modules/gui.rst b/docs/source/modules/gui.rst
@@ -0,0 +1,7 @@
+podcast_llm.gui
+===============
+
+.. automodule:: podcast_llm.gui
+   :members:
+   :undoc-members:
+   :show-inheritance: 
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
@@ -32,7 +32,27 @@ Here's how to use the Podcast LLM system:
       # Generate Markdown output
       podcast-llm "Machine Learning" --text-output podcast.md
 
+
 Configuration
 ------------
 
 The system can be configured using the ``config.yaml`` file:
+
+
+Launching the Web Interface
+-------------------------
+
+You can launch the Gradio web interface using:
+
+.. code-block:: bash
+
+   podcast-llm-gui
+
+This launches a user-friendly web interface where you can:
+
+- Enter a podcast topic
+- Choose between research and context modes  
+- Upload source files and URLs for context mode
+- Configure Q&A rounds and checkpointing
+- Specify output paths for text and audio
+- Monitor generation progress in real-time
diff --git a/podcast_llm/config/logging_config.py b/podcast_llm/config/logging_config.py
@@ -22,13 +22,14 @@
 from typing import Optional
 
 
-def setup_logging(log_level: Optional[int] = None) -> None:
+def setup_logging(log_level: Optional[int] = None, output_file: Optional[str] = None) -> None:
     """
     Set up standardized logging configuration for the podcast generation system.
 
     Args:
         log_level: Optional logging level to set. If None, defaults to INFO.
             Use logging.DEBUG for debug output.
+        output_file: Optional file path to write logs to. If None, logs to stdout.
 
     The format for log messages is:
     YYYY-MM-DD HH:MM:SS - LEVEL - MESSAGE
@@ -38,8 +39,12 @@ def setup_logging(log_level: Optional[int] = None) -> None:
     for handler in root_logger.handlers[:]:
         root_logger.removeHandler(handler)
 
-    # Configure logging to output to stdout
-    handler = logging.StreamHandler(sys.stdout)
+    # Configure logging handler based on output destination
+    if output_file:
+        handler = logging.FileHandler(output_file)
+    else:
+        handler = logging.StreamHandler(sys.stdout)
+
     handler.setFormatter(logging.Formatter(
         '%(asctime)s - %(levelname)s - %(message)s',
         datefmt='%Y-%m-%d %H:%M:%S'

diff --git a/podcast_llm/extractors/youtube.py b/podcast_llm/extractors/youtube.py
@@ -71,10 +71,14 @@ def _extract_video_id(self) -> str:
         # Handle youtube.com URLs
         if 'v=' in self.src:
             return self.src.split('v=')[1].split('&')[0]
-            
+
         # Handle embed URLs
         if 'embed/' in self.src:
             return self.src.split('embed/')[-1].split('?')[0]
+
+        # Handle embed URLs
+        if 'shorts/' in self.src:
+            return self.src.split('shorts/')[-1].split('?')[0]
 
         # If no URL patterns match, assume src is already a video ID
         return self.src

diff --git a/podcast_llm/generate.py b/podcast_llm/generate.py
@@ -55,7 +55,8 @@ def generate(
     audio_output: Optional[str] = None,
     text_output: Optional[str] = None,
     config: str = DEFAULT_CONFIG_PATH,
-    debug: bool = False
+    debug: bool = False,
+    log_file: Optional[str] = None
 ) -> None:
     """
     Generate a podcast episode.
@@ -70,9 +71,10 @@ def generate(
         text_output: Path to save text output
         config: Path to config file
         debug: Whether to enable debug logging
+        log_file: Log output file
     """
     log_level = logging.DEBUG if debug else logging.INFO
-    setup_logging(log_level)
+    setup_logging(log_level, output_file=log_file)
 
     config = PodcastConfig.load(yaml_path=config)
 

diff --git a/podcast_llm/gui.py b/podcast_llm/gui.py
@@ -0,0 +1,208 @@
+"""
+Graphical user interface module for podcast generation.
+
+This module provides a web-based GUI using Gradio for generating podcasts. It allows users
+to interactively specify podcast generation parameters including:
+
+- Topic selection
+- Operation mode (research or context-based)
+- Source materials (files and URLs) for context mode
+- Number of Q&A rounds
+- Checkpointing preferences
+- Custom configuration
+- Output paths for text and audio
+
+The GUI provides a user-friendly way to access the podcast generation functionality
+without needing to use the command line interface.
+
+The module handles form submission, input validation, logging setup, and coordinates
+with the core generation functionality. It uses temporary files for logging and
+provides real-time feedback during the generation process.
+"""
+
+
+import logging
+import os
+from pathlib import Path
+import tempfile
+
+import gradio as gr
+from gradio_log import Log
+
+from .config.logging_config import setup_logging
+from .generate import generate
+
+PACKAGE_ROOT = Path(__file__).parent
+DEFAULT_CONFIG_PATH = os.path.join(PACKAGE_ROOT, 'config', 'config.yaml')
+
+temp_log_file = tempfile.NamedTemporaryFile(mode='w', delete=False).name
+
+
+def submit_handler(
+    topic: str,
+    mode_of_operation: str,
+    source_files: list[str],
+    source_urls: str,
+    qa_rounds: int,
+    use_checkpoints: bool,
+    custom_config_file: str | None,
+    text_output: str,
+    audio_output: str
+) -> None:
+    """
+    Handle form submission for podcast generation.
+
+    Processes user inputs from the GUI form and calls the generate function with appropriate parameters.
+    Handles input validation, logging, and file path processing.
+
+    Args:
+        topic: The podcast topic
+        mode_of_operation: Either 'research' or 'context' mode
+        source_files: List of source file paths to use as context
+        source_urls: Newline-separated string of URLs to use as context
+        qa_rounds: Number of Q&A rounds per section
+        use_checkpoints: Whether to enable checkpointing
+        custom_config_file: Optional path to custom config file
+        text_output: Path to save text output (optional)
+        audio_output: Path to save audio output (optional)
+
+    Returns:
+        None
+    """
+    setup_logging(log_level=logging.INFO, output_file=temp_log_file)
+    # Print values and types of all arguments
+    logging.info(f'Topic: {topic} (type: {type(topic)})')
+    logging.info(f'Mode of Operation: {mode_of_operation} (type: {type(mode_of_operation)})')
+    logging.info(f'Source Files: {source_files} (type: {type(source_files)})')
+    logging.info(f'Source URLs: {source_urls} (type: {type(source_urls)})')
+    logging.info(f'QA Rounds: {qa_rounds} (type: {type(qa_rounds)})')
+    logging.info(f'Use Checkpoints: {use_checkpoints} (type: {type(use_checkpoints)})')
+    logging.info(f'Custom Config File: {custom_config_file} (type: {type(custom_config_file)})')
+    logging.info(f'Text Output: {text_output} (type: {type(text_output)})')
+    logging.info(f'Audio Output: {audio_output} (type: {type(audio_output)})')
+
+    text_output_file = text_output.strip() if text_output.strip() else None
+    audio_output_file = audio_output.strip() if audio_output.strip() else None
+
+    # Split URLs by line and filter out non-URL lines
+    source_urls_list = [
+        url.strip() 
+        for url in source_urls.strip().split('\n') 
+        if url.strip().startswith(('http://', 'https://'))
+    ]
+
+    # Combine source files and URLs into single sources list
+    sources = (source_files or []) + source_urls_list
+    sources = sources if sources else None
+
+    generate(
+        topic=topic.strip(),
+        mode=mode_of_operation,
+        sources=sources,
+        qa_rounds=qa_rounds,
+        use_checkpoints=use_checkpoints,
+        audio_output=audio_output_file,
+        text_output=text_output_file,
+        config=custom_config_file if custom_config_file else DEFAULT_CONFIG_PATH,
+        debug=False,
+        log_file=temp_log_file
+    )
+
+def main():
+    """
+    Main entry point for the Gradio web interface.
+
+    Creates and launches a Gradio interface that provides a user-friendly way to interact
+    with the podcast generation system. The interface includes:
+    - Topic input and conversation settings
+    - Mode selection (research vs context)
+    - Source file and URL inputs for context mode
+    - Behavior options like checkpointing
+    - Output configuration options
+
+    The interface is organized into logical sections with clear labels and tooltips.
+    All inputs are validated and passed to the generate() function.
+
+    Returns:
+        None
+    """
+    with gr.Blocks() as iface:
+        # Title
+        gr.Markdown('# Podcast-LLM', elem_classes='text-center')
+
+        # Conversation Options Section
+        gr.Markdown('## Conversation Options')
+        with gr.Row():
+            topic_input = gr.Textbox(label='Topic')
+            qa_rounds_input = gr.Number(
+                label='Number of rounds of Q&A per section',
+                value=1,
+                interactive=True,
+                minimum=1,
+                maximum=10,
+                precision=0
+            )
+
+        # Mode Selection Section  
+        gr.Markdown('## Mode of Operation')
+        mode_of_operation = gr.Radio(
+            choices=['research', 'context'],
+            label='Mode',
+            value='research',
+            interactive=True,
+            show_label=False
+        )
+
+        # Source Inputs Section
+        with gr.Row(equal_height=True):
+            source_files = gr.File(
+                label='Source files',
+                file_count='multiple',
+                type='filepath'
+            )
+            source_urls = gr.TextArea(label='Source URLs')
+
+        # Behavior Options Section
+        gr.Markdown('## Behaviour Options')
+        use_checkpoints_input = gr.Checkbox(
+            label='Use Checkpoints',
+            value=True
+        )
+        custom_config_file_input = gr.File(
+            label='Config file',
+            type='filepath'
+        )
+
+        # Output Options Section
+        gr.Markdown('## Output Options')
+        with gr.Row():
+            text_output_input = gr.Textbox(label='Text output')
+            audio_output_input = gr.Textbox(label='Audio output')
+
+        # Submit Button
+        submit_button = gr.Button('Generate Podcast')
+        submit_button.click(
+            fn=submit_handler,
+            inputs=[
+                topic_input,
+                mode_of_operation,
+                source_files,
+                source_urls,
+                qa_rounds_input,
+                use_checkpoints_input,
+                custom_config_file_input,
+                text_output_input,
+                audio_output_input
+            ],
+            outputs=[]
+        )
+
+        # Log Display
+        gr.Markdown('## System Log')
+        Log(temp_log_file, dark=True, xterm_font_size=12)
+
+    iface.launch()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/podcast_llm/text_to_speech.py b/podcast_llm/text_to_speech.py
@@ -145,7 +145,7 @@ def process_line_google(config: PodcastConfig, text: str, speaker: str):
 
     # Select the type of audio file you want returned
     audio_config = texttospeech.AudioConfig(
-        audio_encoding=texttospeech.AudioEncoding.MP3_64_KBPS,
+        audio_encoding=texttospeech.AudioEncoding.MP3,
         effects_profile_id=tts_settings['effects_profile_id']
     )