Skip to content

Commit

Permalink
Merge pull request #13 from jquagga/faster_whisper
Browse files Browse the repository at this point in the history
Add a faster_whisper function
  • Loading branch information
jquagga authored Apr 6, 2024
2 parents ad32740 + f2a790d commit b2653a4
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
6 changes: 4 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ services:
hostname: ttt
restart: unless-stopped
volumes:
- ../trunk-recorder-config/media/transcribe:/app/media/transcribe
- ./media/transcribe:/app/media/transcribe
- ./destinations.csv:/app/destinations.csv:ro
- ./models:/app/models #faster_whisper models
user: "1000"
environment:
- TZ=America/New_York
- TTT_WHISPERCPP_URL=${TTT_WHISPERCPP_URL}
- TTT_DEEPGRAM_KEY=${TTT_DEEPGRAM_KEY}
- TTT_DEEPGRAM_KEY=${TTT_DEEPGRAM_KEY}
44 changes: 36 additions & 8 deletions ttt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ def transcribe_call(destinations):
calljson = json.loads(calljson)

# Send the json and audiofile to a function to transcribe
# If TTT_DEEPGRAM_KEY is set, use deepgram, else whispercpp
if deepgram_key := os.environ.get("TTT_DEEPGRAM_KEY", False):
# If TTT_DEEPGRAM_KEY is set, use deepgram, else
# if TTT_WHISPER_URL is set, use whisper.cpp else
# fasterwhisper deepgram_key := whispercpp_url :=
if os.environ.get("TTT_DEEPGRAM_KEY", False):
calljson = transcribe_deepgram(calljson, audiofile)
else:
elif os.environ.get("TTT_WHISPERCPP_URL", False):
calljson = transcribe_whispercpp(calljson, audiofile)
else:
calljson = transcribe_fasterwhisper(calljson, audiofile)

# Ok, we have text back, send for notification
send_notifications(calljson, destinations)
Expand All @@ -50,11 +54,6 @@ def transcribe_call(destinations):
def transcribe_whispercpp(calljson, audiofile):
whisper_url = os.environ.get("TTT_WHISPERCPP_URL", "http://whisper:8080")

# Check if we are running behind
queue_time = float(datetime.now().timestamp()) - calljson["start_time"]
if queue_time > 180:
print("Queue exceeds 3 minutes")

# Now send the files over to whisper for transcribing
files = {
"file": (None, audiofile.read_bytes()),
Expand All @@ -77,6 +76,35 @@ def transcribe_whispercpp(calljson, audiofile):
return calljson


def transcribe_fasterwhisper(calljson, audiofile):
from faster_whisper import WhisperModel

model_size = os.environ.get("TTT_FASTERWHISPER_MODEL", "distil-large-v3")
# model_size = "distil-large-v3"

# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda",
# compute_type="float16", download_root="models")

# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda",
# compute_type="int8_float16", download_root="models")
# or run on CPU with INT8
model = WhisperModel(
model_size, device="cpu", compute_type="int8", download_root="models"
)

# This whisper wants the path, not bytes but we need to cast it from pathlib to str
audiofile = str(audiofile)
segments, info = model.transcribe(audiofile, beam_size=5, vad_filter=True)

calltext = "".join(segment.text for segment in segments)

calljson["text"] = calltext

return calljson


def transcribe_deepgram(calljson, audiofile):
deepgram_key = os.environ.get("TTT_DEEPGRAM_KEY")
headers = {
Expand Down

0 comments on commit b2653a4

Please sign in to comment.