Skip to content

Commit

Permalink
Add an endpoint for auto-labeling karaoke lyrics with AI
Browse files Browse the repository at this point in the history
  • Loading branch information
m-danya committed Apr 16, 2024
1 parent 51d8339 commit a8598c6
Show file tree
Hide file tree
Showing 6 changed files with 338 additions and 21 deletions.
41 changes: 21 additions & 20 deletions accompanist/collection/recognizer.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from functools import lru_cache
from pathlib import Path
from typing import List

import whisper
import yaml


class Timestamper:
def __init__(self, data_path: str) -> None:
self.data_path = data_path
class LyricsTimestamper:
MODEL_TYPE = "tiny"
DEVICE = "cuda"

with open(self.data_path, "r") as config:
model_cfg = yaml.safe_load(config)
model_type = model_cfg["type"]
device = model_cfg["device"]
self.model = whisper.load_model(model_type).to(device)
def __init__(self) -> None:
self.model = whisper.load_model(self.MODEL_TYPE).to(self.DEVICE)

def _levenshtein_distance(self, s1: str, s2: str) -> int:
@lru_cache(maxsize=None)
Expand All @@ -27,18 +24,20 @@ def helper(i, j):

return helper(len(s1), len(s2))

def get_line_timestamps(
self, path_mp3: str, path_text: str
) -> List[dict[str, str]]:
with open(path_text, "r") as f:
text_lines = f.readlines()
def get_karaoke_lyrics(self, path_mp3: Path, lyrics: str) -> List[dict[str, str]]:
# TODO: standardize lyrics preprocessing
lyrics_lines = [
line.strip()
for line in lyrics.split("\n")
if line.strip() and not line.startswith("[")
]

transcript = self.model.transcribe(word_timestamps=True, audio=path_mp3)
transcript = self.model.transcribe(word_timestamps=True, audio=str(path_mp3))

output_data = []
lyrics_karaoke = []
segments = transcript["segments"]
prev_n = 0
for line in text_lines:
for line in lyrics_lines:
last_word = line[-1]
curr_n = len(line)
curr_segments = segments[prev_n:curr_n]
Expand All @@ -47,13 +46,15 @@ def get_line_timestamps(
min_lev_value = 1000000
for segment in curr_segments:
curr_word = segment["words"][0]["word"]
curr_end = segments["words"][0]["end"]
curr_end = segment["words"][0]["end"]
curr_lev_value = self._levenshtein_distance(curr_word, last_word)
if min_lev_value > curr_lev_value:
min_lev_value = curr_lev_value
min_lev_wordend = curr_end

prev_n = curr_n
output_data.append({"line": line, "end_ts": min_lev_wordend})
lyrics_karaoke.append({"line": line, "end_ts": min_lev_wordend})

return output_data
# TODO: assert that lines are sorted by "end_ts"
print(lyrics_karaoke)
return lyrics_karaoke
7 changes: 7 additions & 0 deletions accompanist/collection/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ async def update_track_lyrics(track_id: int):
return track


# TODO: rename
@router.post("/tracks/{track_id}/lyrics_timestamps")
async def update_track_lyrics_timestamps(track_id: int):
track = await service.update_lyrics_timestamps(track_id)
return track


@router.post("/update_lyrics_dev", include_in_schema=False)
async def update_all_tracks_lyrics():
tracks = await TrackDAO.find_all()
Expand Down
13 changes: 13 additions & 0 deletions accompanist/collection/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from accompanist.celery.tasks import process_album_task
from accompanist.collection.dao import TrackDAO
from accompanist.collection.recognizer import LyricsTimestamper
from accompanist.collection.schema import AlbumInfoFromUser, TrackUpdateRequest
from accompanist.collection.service_genius import get_lyrics_from_genius
from accompanist.config import settings


async def add_album(album_info: AlbumInfoFromUser):
Expand All @@ -19,3 +21,14 @@ async def update_track_lyrics_by_id(track_id: int):
update_request = TrackUpdateRequest(lyrics=lyrics, genius_url=genius_url)
track = await TrackDAO.update(track.id, update_request)
return track


# TODO: rename
async def update_lyrics_timestamps(track_id: int):
track = await TrackDAO.get_with_artist(track_id)
timestamper = LyricsTimestamper()
vocals_mp3_path = settings.STORAGE_PATH / track.filename_vocals
lyrics_karaoke = timestamper.get_karaoke_lyrics(vocals_mp3_path, track.lyrics)
update_request = TrackUpdateRequest(lyrics_karaoke=lyrics_karaoke)
track = await TrackDAO.update(track.id, update_request)
return track
38 changes: 38 additions & 0 deletions frontend/src/components/TrackComponent.vue
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@
>
Разметить караоке-текст
</button>
<div class="updateAILyricsButtonDiv">
<div v-if="updateAIKaraokeLyricsButtonIsLoading">
<SpinnerComponent size="20px" />
</div>
<button
v-else
@click="updateAIKaraokeLyrics"
class="updateLyricsButton"
>
Разметить караоке-текст с помощью ИИ
</button>
</div>
<div v-if="isKaraokeInRecordingMode">
<RecordTimecodesComponent
:mp3Url="getStaticUrl(track.filename_original)"
Expand Down Expand Up @@ -176,6 +188,7 @@ import { faFileLines } from "@fortawesome/free-regular-svg-icons";
const backendAddress = inject("backendAddress");
const getStaticUrl = inject("getStaticUrl");
const updateLyricsButtonIsLoading = ref(false);
const updateAIKaraokeLyricsButtonIsLoading = ref(false);
const TrackPageStates = {
ShowSpinnerInsteadOfLyrics: 0,
ShowPlainLyrics: 1,
Expand Down Expand Up @@ -298,6 +311,31 @@ async function updateLyrics() {
}
}
async function updateAIKaraokeLyrics() {
try {
// TODO: rename variables
updateAIKaraokeLyricsButtonIsLoading.value = true;
const response = await fetch(
`${backendAddress}/tracks/${track.value.id}/lyrics_timestamps`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
}
);
if (!response.ok) {
throw new Error("Failed to update karaoke lyrics with AI");
}
track.value = await response.json();
} catch (error) {
console.error(error);
alert("Failed to update karaoke lyrics with AI");
} finally {
updateAIKaraokeLyricsButtonIsLoading.value = false;
}
}
function goToNextTrack() {
// `number_in_album` is indexed from 1
let nextTrackId = album.value.tracks[track.value.number_in_album + 1 - 1].id;
Expand Down
Loading

0 comments on commit a8598c6

Please sign in to comment.