Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ sudo -E ./target/debug/voice-keyboard --test-stt

## Speech-to-Text Service

This application uses **Deepgram Flux**, the company's new turn‑taking STT API. The default WebSocket URL is `wss://api.preview.deepgram.com/v2/listen`.
This application uses **Deepgram Flux**, the company's new turn‑taking STT API. The default WebSocket URL is `wss://api.deepgram.com/v2/listen`.

## Command Line Options

Expand All @@ -89,7 +89,7 @@ OPTIONS:
--test-audio Test audio input and show levels
--test-stt Test speech-to-text functionality (default if no other mode specified)
--debug-stt Debug speech-to-text (print transcripts without typing)
--stt-url <URL> Custom STT service URL (default: wss://api.preview.deepgram.com/v2/listen)
--stt-url <URL> Custom STT service URL (default: wss://api.deepgram.com/v2/listen)
-h, --help Print help information
-V, --version Print version information
```
Expand All @@ -116,7 +116,7 @@ The application provides sophisticated real-time transcript updates:

## About Deepgram Flux (Early Access)

- **Endpoint**: `wss://api.preview.deepgram.com/v2/listen`
- **Endpoint**: `wss://api.deepgram.com/v2/listen`
- **What it is**: Flux is Deepgram's turn‑taking, low‑latency STT API designed for conversational experiences.
- **Authentication**: Send an `Authorization` header. Common forms:
- `Token <DEEPGRAM_API_KEY>` (what this app uses)
Expand Down
22 changes: 22 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
inputs = {
naersk.url = "github:nix-community/naersk/master";
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
utils.url = "github:numtide/flake-utils";
};

outputs = { self, nixpkgs, utils, naersk }:
utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
naersk-lib = pkgs.callPackage naersk { };
in
{
defaultPackage = naersk-lib.buildPackage ./.;
devShell = with pkgs; mkShell {
buildInputs = [ cargo rustc rustfmt rust-analyzer pre-commit rustPackages.clippy alsa-lib ];
RUST_SRC_PATH = rustPlatform.rustLibSrc;
};
}
);
}
13 changes: 7 additions & 6 deletions run.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash

# Voice Keyboard Runner Script
# This script runs the voice-keyboard with proper privilege handling
Expand All @@ -11,22 +11,23 @@

# Check if we're already running as root
if [ "$EUID" -eq 0 ]; then
echo "Error: Don't run this script as root. It will handle privileges automatically."
exit 1
echo "Error: Don't run this script as root. It will handle privileges automatically."
exit 1
fi

# Build the project first
echo "Building voice-keyboard..."
cargo build

if [ $? -ne 0 ]; then
echo "Build failed!"
exit 1
echo "Build failed!"
exit 1
fi

# Run with sudo -E to preserve environment variables
echo "Starting voice-keyboard with privilege dropping..."
echo "Note: This will create a virtual keyboard as root, then drop privileges for audio access."
echo ""

sudo -E ./target/debug/voice-keyboard "$@"
sudo -E ./target/debug/voice-keyboard "$@"

2 changes: 1 addition & 1 deletion src/stt_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use tokio_tungstenite::tungstenite::error::Error as WsError;
use tokio_tungstenite::{connect_async, tungstenite::Message};
use tracing::{debug, error, info};

pub const STT_URL: &str = "wss://api.preview.deepgram.com/v2/listen";
pub const STT_URL: &str = "wss://api.deepgram.com/v2/listen";

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WordInfo {
Expand Down
12 changes: 9 additions & 3 deletions src/virtual_keyboard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,9 @@ impl<H: KeyboardHardware> VirtualKeyboard<H> {
// Clear the current text tracking
self.current_text.clear();

// Add a space after the end of each turn
self.hardware.type_text(" ")?;

Ok(())
}

Expand Down Expand Up @@ -475,10 +478,11 @@ mod tests {
assert_eq!(kb.current_text, "hello");
assert!(!kb.hardware.enter_pressed);

// Finalize (should not press enter anymore)
// Finalize (should not press enter anymore but should add a space)
kb.finalize_transcript().unwrap();
assert_eq!(kb.current_text, "");
assert!(!kb.hardware.enter_pressed); // Should remain false
assert_eq!(kb.hardware.typed_chars, ['h', 'e', 'l', 'l', 'o', ' ']);
}

#[test]
Expand Down Expand Up @@ -610,12 +614,13 @@ mod tests {
assert_eq!(kb.current_text, "hello world");
assert!(!kb.hardware.enter_pressed);

// Finalize - should not press ENTER key
// Finalize - should not press ENTER key but should add a space
kb.finalize_transcript().unwrap();
assert_eq!(kb.current_text, "");
assert!(!kb.hardware.enter_pressed);
// Should not have backspaced anything for the enter command
assert_eq!(kb.hardware.backspace_count, 0);
assert_eq!(kb.hardware.typed_chars, ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', ' ']);
}

#[test]
Expand All @@ -627,12 +632,13 @@ mod tests {
assert_eq!(kb.current_text, "enter the room");
assert!(!kb.hardware.enter_pressed);

// Finalize - should not press ENTER key since "enter" is not the last word
// Finalize - should not press ENTER key since "enter" is not the last word but should add a space
kb.finalize_transcript().unwrap();
assert_eq!(kb.current_text, "");
assert!(!kb.hardware.enter_pressed);
// Should not have backspaced anything for the enter command
assert_eq!(kb.hardware.backspace_count, 0);
assert_eq!(kb.hardware.typed_chars, ['e', 'n', 't', 'e', 'r', ' ', 't', 'h', 'e', ' ', 'r', 'o', 'o', 'm', ' ']);
}

#[test]
Expand Down