-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
66 lines (50 loc) · 1.73 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import gentle as gentle
def run_gentle(seg, transcript):
"""
Takes in a segment
1. create new text file containing text
2. create new audio with pydub
3. run Gentle with these two
4. delete text file/audio files
Parameters
---------
seg : Segment object to align with Gentle
transcript : string holding the relevant transcript for this segment
"""
audio_cut = seg.audio_file[1000 * seg.start_audio : 1000 * seg.end_audio]
audio_cut.export("temp_audio.wav", format="wav")
# run Gentle
resources = gentle.Resources()
with gentle.resampled("temp_audio.wav") as wavfile:
aligner = gentle.ForcedAligner(resources, transcript)
result = aligner.transcribe(wavfile).words
# delete cut audio file
os.remove("temp_audio.wav")
# fix unaligned-word start/end time data
fix_unaligned(result, len(audio_cut) / 1000)
# put gentle timestamps in relation to entire file
for word in result:
word.start += seg.start_audio
word.end += seg.start_audio
return result
def fix_unaligned(gentle_output, audio_file_length):
"""
Give approximate start/end times to unaligned words in the Gentle output.
Parameters
----------
gentle_output : list of Word objects returned by Gentle
audio_file : AudioSegment object representing the entire audio file
"""
initial_start = 0
for word in gentle_output:
if not word.success():
word.start = initial_start
else:
initial_start = word.end
initial_end = audio_file_length
for word in gentle_output[::-1]:
if not word.success():
word.end = initial_end
else:
initial_end = word.start