RobotecAI
diff --git a/‎examples/s2s/asr.py
Lines changed: 119 additions & 0 deletions b/‎examples/s2s/asr.py
Lines changed: 119 additions & 0 deletions
diff --git a/‎examples/s2s/conversational.py
Lines changed: 171 additions & 0 deletions b/‎examples/s2s/conversational.py
Lines changed: 171 additions & 0 deletions
diff --git a/‎examples/s2s/run.sh
Lines changed: 66 additions & 0 deletions b/‎examples/s2s/run.sh
Lines changed: 66 additions & 0 deletions
@@ -0,0 +1,119 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import signal
+import time
+
+import rclpy
+from rai.agents import VoiceRecognitionAgent
+from rai.communication.sound_device.api import SoundDeviceConfig
+
+from rai_asr.models import LocalWhisper, OpenWakeWord, SileroVAD
+
+VAD_THRESHOLD = 0.8  # Note that this might be different depending on your device
+OWW_THRESHOLD = 0.1  # Note that this might be different depending on your device
+
+VAD_SAMPLING_RATE = 16000  # Or 8000
+DEFAULT_BLOCKSIZE = 1280
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Voice Activity Detection and Wake Word Detection Configuration",
+        allow_abbrev=True,
+    )
+
+    # Predefined arguments
+    parser.add_argument(
+        "--vad-threshold",
+        type=float,
+        default=VAD_THRESHOLD,
+        help="Voice Activity Detection threshold (default: 0.5)",
+    )
+    parser.add_argument(
+        "--oww-threshold",
+        type=float,
+        default=OWW_THRESHOLD,
+        help="OpenWakeWord threshold (default: 0.1)",
+    )
+    parser.add_argument(
+        "--vad-sampling-rate",
+        type=int,
+        choices=[8000, 16000],
+        default=VAD_SAMPLING_RATE,
+        help="VAD sampling rate (default: 16000)",
+    )
+    parser.add_argument(
+        "--block-size",
+        type=int,
+        default=DEFAULT_BLOCKSIZE,
+        help="Audio block size (default: 1280)",
+    )
+    parser.add_argument(
+        "--device-name",
+        type=str,
+        default="default",
+        help="Microphone device name (default: 'default')",
+    )
+
+    # Use parse_known_args to ignore unknown arguments
+    args, unknown = parser.parse_known_args()
+
+    if unknown:
+        print(f"Ignoring unknown arguments: {unknown}")
+
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+
+    microphone_configuration = SoundDeviceConfig(
+        stream=True,
+        channels=1,
+        device_name=args.device_name,
+        block_size=args.block_size,
+        consumer_sampling_rate=args.vad_sampling_rate,
+        dtype="int16",
+        device_number=None,
+        is_input=True,
+        is_output=False,
+    )
+    vad = SileroVAD(args.vad_sampling_rate, args.vad_threshold)
+    oww = OpenWakeWord("hey jarvis", args.oww_threshold)
+    whisper = LocalWhisper("tiny", args.vad_sampling_rate)
+    # whisper = OpenAIWhisper("whisper-1", args.vad_sampling_rate, "en")
+
+    rclpy.init()
+    ros2_name = "rai_asr_agent"
+
+    agent = VoiceRecognitionAgent(microphone_configuration, ros2_name, whisper, vad)
+    agent.add_detection_model(oww, pipeline="record")
+
+    agent.run()
+
+    def cleanup(signum, frame):
+        print("\nCustom handler: Caught SIGINT (Ctrl+C).")
+        print("Performing cleanup")
+        # Optionally exit the program
+        agent.stop()
+        rclpy.shutdown()
+        exit(0)
+
+    signal.signal(signal.SIGINT, cleanup)
+
+    print("Runnin")
+    while True:
+        time.sleep(1)
@@ -0,0 +1,171 @@
+# Copyright (C) 2024 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import signal
+import time
+from queue import Queue
+from threading import Event, Thread
+from typing import Dict, List
+
+import rclpy
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from rai.agents.base import BaseAgent
+from rai.communication import BaseConnector
+from rai.communication.ros2.api import IROS2Message
+from rai.communication.ros2.connectors import ROS2HRIConnector, TopicConfig
+from rai.utils.model_initialization import get_llm_model
+
+from rai_interfaces.msg import HRIMessage as InterfacesHRIMessage
+
+# NOTE: the Agent code included here is temporary until a dedicated speech agent is created
+# it can still serve as a reference for writing your own RAI agents
+
+
+class LLMTextHandler(BaseCallbackHandler):
+    def __init__(self, connector: ROS2HRIConnector):
+        self.connector = connector
+        self.token_buffer = ""
+
+    def on_llm_new_token(self, token: str, **kwargs):
+        self.token_buffer += token
+        if len(self.token_buffer) > 100 or token in [".", "?", "!", ",", ";", ":"]:
+            self.connector.send_all_targets(AIMessage(content=self.token_buffer))
+            self.token_buffer = ""
+
+    def on_llm_end(
+        self,
+        response,
+        *,
+        run_id,
+        parent_run_id=None,
+        **kwargs,
+    ):
+        if self.token_buffer:
+            self.connector.send_all_targets(AIMessage(content=self.token_buffer))
+            self.token_buffer = ""
+
+
+class S2SConversationalAgent(BaseAgent):
+    def __init__(self, connectors: Dict[str, BaseConnector]):  # type: ignore
+        super().__init__(connectors=connectors)
+        self.message_history: List[HumanMessage | AIMessage | SystemMessage] = [
+            SystemMessage(
+                content="Pretend you are a robot. Answer as if you were a robot."
+            )
+        ]
+        self.speech_queue: Queue[InterfacesHRIMessage] = Queue()
+
+        self.llm = get_llm_model(model_type="complex_model", streaming=True)
+        self._setup_ros_connector()
+        self.main_thread = None
+        self.stop_thread = Event()
+
+    def run(self):
+        logging.info("Running S2SConversationalAgent")
+        self.main_thread = Thread(target=self._main_loop)
+        self.main_thread.start()
+
+    def _main_loop(self):
+        while not self.stop_thread.is_set():
+            time.sleep(0.01)
+            speech = ""
+            while not self.speech_queue.empty():
+                speech += "".join(self.speech_queue.get().text)
+                logging.info(f"Received human speech {speech}!")
+            if speech != "":
+                self.message_history.append(HumanMessage(content=speech))
+                assert isinstance(self.connectors["ros2"], ROS2HRIConnector)
+                # ai_answer = AIMessage(content="Yes, I am Jar Jar Binks")
+                # self.connectors["ros2"].send_all_targets(ai_answer)
+                ai_answer = self.llm.invoke(
+                    speech,
+                    config={"callbacks": [LLMTextHandler(self.connectors["ros2"])]},
+                )
+                self.message_history.append(ai_answer)  # type: ignore
+
+    def _on_from_human(self, msg: IROS2Message):
+        assert isinstance(msg, InterfacesHRIMessage)
+        logging.info("Received message from human: %s", msg.text)
+        self.speech_queue.put(msg)
+
+    def _setup_ros_connector(self):
+        self.connectors["ros2"] = ROS2HRIConnector(
+            sources=[
+                (
+                    "/from_human",
+                    TopicConfig(
+                        "rai_interfaces/msg/HRIMessage",
+                        is_subscriber=True,
+                        source_author="human",
+                        subscriber_callback=self._on_from_human,
+                    ),
+                )
+            ],
+            targets=[
+                (
+                    "/to_human",
+                    TopicConfig(
+                        "rai_interfaces/msg/HRIMessage",
+                        source_author="ai",
+                        is_subscriber=False,
+                    ),
+                )
+            ],
+        )
+
+    def stop(self):
+        assert isinstance(self.connectors["ros2"], ROS2HRIConnector)
+        self.connectors["ros2"].shutdown()
+        self.stop_thread.set()
+        if self.main_thread is not None:
+            self.main_thread.join()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Text To Speech Configuration",
+        allow_abbrev=True,
+    )
+
+    # Use parse_known_args to ignore unknown arguments
+    args, unknown = parser.parse_known_args()
+
+    if unknown:
+        print(f"Ignoring unknown arguments: {unknown}")
+
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    rclpy.init()
+    agent = S2SConversationalAgent(connectors={})
+    agent.run()
+
+    def cleanup(signum, frame):
+        print("\nCustom handler: Caught SIGINT (Ctrl+C).")
+        print("Performing cleanup")
+        # Optionally exit the program
+        agent.stop()
+        rclpy.shutdown()
+        exit(0)
+
+    signal.signal(signal.SIGINT, cleanup)
+
+    print("Runnin")
+    while True:
+        time.sleep(1)
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# Directory where the scripts are located
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Array to store PIDs of background processes
+declare -a PIDS
+
+# Function to run a script with the given arguments
+run_script() {
+    local script="$1"
+    shift
+    python3 "$script" "$@" &
+    # Store the PID of the last background process
+    PIDS+=($!)
+}
+
+# Function to handle Ctrl+C (SIGINT)
+handle_sigint() {
+    echo -e "\nReceived SIGINT, forwarding to all running Python processes..."
+
+    # Send SIGINT to all child processes
+    for pid in "${PIDS[@]}"; do
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "Sending SIGINT to process $pid"
+            kill -SIGINT "$pid"
+        fi
+    done
+
+    echo "Waiting for all processes to exit..."
+    wait
+
+    echo "All processes have exited. Cleaning up and exiting."
+    exit 0
+}
+
+# Main logic
+main() {
+    # Set up trap for SIGINT (Ctrl+C)
+    trap handle_sigint SIGINT
+
+    # Find all Python scripts in the scripts directory
+    mapfile -t scripts < <(find "$SCRIPT_DIR" -name "*.py")
+
+    # If no scripts found, exit
+    if [ ${#scripts[@]} -eq 0 ]; then
+        echo "No Python scripts found in $SCRIPT_DIR"
+        exit 1
+    fi
+
+    echo "Found ${#scripts[@]} Python scripts in $SCRIPT_DIR"
+
+    # Run all scripts in parallel with all arguments properly quoted
+    for script in "${scripts[@]}"; do
+        run_script "$script" "$@"
+    done
+
+    echo "All scripts are running in the background. Press Ctrl+C to stop them."
+
+    # Wait for all background processes to finish
+    wait
+
+    echo "All scripts completed successfully."
+}
+
+# Call main with all arguments properly quoted
+main "$@"